Uncovering immune evasion mechanisms of leukemic cells from natural killer cells. In this sub-project RNA sequencing of leukemic cells is used to discover transcriptional changes resulting in NK cell-resistance. There is another sub-project using ATAC-seq (transposase-accessible chromatin) to epigenetic changes resulting in NK cell-resistance - this is not part of this report.
Taken from discussion summary (‘21009_Porject discussion summary.docx’):
Biological and technical replicates
Experiment Timepoints
RNA-seq methods: QuantSeq 3’ mRNA-Seq - 1x50bp HiSeq 3000/4000.
Pre-processing: ADD methods+versions for preprocessing
Downstream analysis: ADD methods+versions for downstream!
MultiQC report ADD full multiqc report description. Few observations:
# importing only key functions that are actually used - not to polute namespace!
import::from(readr, read_csv)
import::from(magrittr, "%>%")
import::from(dplyr, mutate, select, filter, rename, arrange, desc, group_by, summarise, ungroup) # dplyr_mutate = mutate
import::from(purrr, map)
import::from(future, plan, multisession, sequential)
import::from(furrr, furrr_options, future_map2)
import::from(ggplot2, .all=TRUE) # importing all as there is too many
import::from(grid, gpar) # needed in complexheatmap
import::from(kableExtra, kable_styling, kbl)
import::from(.from = SummarizedExperiment, colData, assay) # used in every .Rmd
import::from(.from = tableone, CreateTableOne)# importing only key functions that are actually used - not to polute namespace!
import::from(.from = readr, read_csv, cols)
import::from(magrittr, "%>%")
import::from(dplyr, mutate, select, filter, rename, arrange, desc, group_by, summarise, ungroup) # dplyr_mutate = mutate
import::from(purrr, map)
import::from(future, plan, multisession, sequential)
import::from(furrr, furrr_options, future_map2)
import::from(ggplot2, .all=TRUE) # importing all as there is too many
import::from(grid, gpar) # needed in complexheatmap
import::from(kableExtra, kable_styling, kbl)
import::from(.from = GenomicFeatures, makeTxDbFromGFF)
import::from(.from = AnnotationDbi, annot_db_keys = keys, annot_db_select = select)
import::from(.from = DESeq2, .all=TRUE)
import::from(.from = tximport, tximport)
import::from(.from = here::here("utils/filterDatasets.R"), "filterDatasets", .character_only=TRUE) # used for filtering
import::from(.from = here::here("utils/generateEnsemblAnnotation.R"), "generateEnsemblAnnotation", .character_only=TRUE) # used for filteringexperiment_name="nk_tum_immunoedit_complete" # this can be a subset analysis - e.g. just batch1,...
experiment_design="1" # used only to construct initial dds object
abs_filt_samples=3
# parameters for annotation
biomart_host = "http://nov2020.archive.ensembl.org"
#biomart_host = "https://www.ensembl.org"
biomart_Ens_version = "Ensembl Genes 102"
biomart_dataset="mmusculus_gene_ensembl"
# directories
output_dir = paste0("results_dir", "/",experiment_name,"/")
dir.create(output_dir)Preparing metadata and expression data and combining into a dds (DESeqDataSet object) that is ready for analyses. dds object is further pre-filtered to remove lowly expressed genes (FPM (fragments per million mapped fragments) > 1 in at least 3 samples). Salmon quantification (quant.sf) transcript abundance estimates (transcript expression) is imported and converted to gene expression using tximport.
# Preparing metadata
rnaseq_metadata_file <- here::here(config$metadata$rnaseq$rnaseq_metadata)
rnaseq_metadata_df_raw <- readr::read_csv(file = rnaseq_metadata_file) # 133 samples; 42 samples from other project?
quantseq_files <- here::here(config$metadata$rnaseq$rnaseq_salmon_files)
# salmon quant files
quant_files_md5sums_raw <- readr::read_table(file = quantseq_files, col_names = c("md5sum", "file_path"))
quant_files_md5sums <- quant_files_md5sums_raw %>%
dplyr::mutate(absolute_quant_files_path = gsub(pattern = "\\./", replacement = paste0(dirname(quantseq_files), "/"), file_path),
sample_name = gsub(pattern = "(\\./)(S_.+)(_0_.+)", replacement = "\\2", file_path)) %>%
# for batch3
dplyr::mutate(sample_name = gsub(pattern = "(\\./)(R.+)(_S.+)", replacement = "\\2", sample_name)) %>%
# EK were samples from other experiment - they were removed from quant.sf list, but in case remove them here as well!
dplyr::filter(!grepl(pattern = "\\./EK.+", x = sample_name)) %>%
dplyr::mutate(bsf_sample_name = gsub(pattern = "(\\./)(S_.+)(_quant/quant.sf)", replacement = "\\2", x = file_path)) %>%
# for batch 3
dplyr::mutate(bsf_sample_name = gsub(pattern = "(\\./)(R.+_S.+)(_quant/quant.sf)", replacement = "\\2", x = bsf_sample_name))
# experiment info - match experiment to run
experiment_info_batch1_2_raw <- readr::read_csv("datasets/metadata/rnaseq_merge_sample_sheet.csv", col_types =cols(.default = "c"))
experiment_info_batch3_raw <- readr::read_csv("datasets/metadata/rnaseq_sample_sheet_batch3.csv", col_types = cols(.default = "c"))
experiment_info_raw <- dplyr::bind_rows(experiment_info_batch1_2_raw, experiment_info_batch3_raw)
# EK samples are from different experiment
experiment_info <- experiment_info_raw %>%
dplyr::filter(!grepl(pattern = "EK.+", x = original_sample_name)) %>%
dplyr::rename(library_name = experiment,
bsf_sample_name = sample_name) %>%
# fixing library_name for experiment EXP9.8
dplyr::mutate(library_name = dplyr::if_else(library_name == "BSF_1241", "R0128_L5464", library_name)) %>%
dplyr::mutate(sample_name_unique = dplyr::case_when(grepl(pattern = "^R", x=original_sample_name) ~ paste0(original_sample_name, "_", library_name),
TRUE ~ paste0("S_", original_sample_name, "_", library_name)))
# loading metadata and adding experiment and salmon data
rnaseq_metadata_df <- rnaseq_metadata_df_raw %>%
dplyr::mutate(condition_complete = condition) %>%
dplyr::mutate(condition = gsub(pattern = "(.+)(_Day.+)", replacement = "\\1", condition_complete)) %>%
dplyr::mutate(condition = gsub(pattern = "\\+", replacement = "_plus_", condition),
timepoint_cell_harvesting = gsub(pattern = " ", replacement = "_", timepoint_cell_harvesting),
experiment = gsub(pattern = " ", replacement = "", original_experiment)) %>%
dplyr::mutate(sample_name_unique = dplyr::case_when(grepl(pattern = "^R", x=sample_name) ~ paste0(sample_name, "_", library_name),
TRUE ~ paste0("S_", sample_name, "_", library_name)))
quant_files_md5sums_annot <- quant_files_md5sums %>%
dplyr::left_join(., experiment_info, by = "bsf_sample_name") %>%
dplyr::select(-sample_name, -library_name)
rnaseq_metadata_df_annot <- rnaseq_metadata_df %>%
dplyr::left_join(., quant_files_md5sums_annot, by="sample_name_unique") %>%
dplyr::mutate(original_sample_name = sample_name) %>%
dplyr::mutate(sample_name = sample_name_unique) %>%
dplyr::mutate(filenames = sample_name_unique)
#table(rnaseq_metadata_df_annot$original_experiment)
#table(rnaseq_metadata_df_annot$cell_line_label)
rnaseq_metadata_df_annot_reduced <- rnaseq_metadata_df_annot %>%
#dplyr::filter(original_experiment == "EXP9_6") %>%
#dplyr::filter(condition %in% c("Tumor_only", "Tumor_plus_NK_Day14")) %>%
dplyr::mutate(technical_replicate = factor(technical_replicate, levels = c("1", "2", "3")),
condition = factor(condition, levels = c("Tumor_only", "Tumor_plus_NK", "Tumor_plus_IFNg", "Tumor_plus_WT_NK", "Tumor_plus_PrfKO_NK")),
timepoint_cell_harvesting = factor(timepoint_cell_harvesting, levels = c("timepoint_0", "timepoint_1", "timepoint_2")),
cell_line_label = as.factor(cell_line_label),
experiment = factor(experiment, levels = c("EXP9.4", "EXP9.5", "EXP9.6", "EXP9.7", "EXP9.8")),
condition_tp = paste0(condition,"_",timepoint_cell_harvesting)) %>%
dplyr::mutate(condition_tp = factor(condition_tp, levels = c("Tumor_only_timepoint_0",
"Tumor_only_timepoint_1", "Tumor_only_timepoint_2",
"Tumor_plus_NK_timepoint_1", "Tumor_plus_NK_timepoint_2",
"Tumor_plus_WT_NK_timepoint_2",
"Tumor_plus_IFNg_timepoint_2", "Tumor_plus_PrfKO_NK_timepoint_2"))) %>%
#fixing day_cell_harvesting
dplyr::mutate(day_cell_harvesting = gsub(pattern = " ", replacement = "", day_cell_harvesting)) %>%
dplyr::mutate(day_cell_harvesting = factor(day_cell_harvesting, levels=c("Day0", "Day4", "Day6", "Day10", "Day14", "Day16")))
metadata <- rnaseq_metadata_df_annot_reduced #cond_data
#rm(reseq_metadata)
rownames(metadata) <- metadata$filenames
# use alternative sample names for multiQC
# renaming for the multiQC report
# metadata_multiqc <- metadata %>%
# dplyr::select(bsf_sample_name, experiment, technical_replicate, cell_line_label, condition, timepoint_cell_harvesting) %>%
# dplyr::mutate(cell_line_label = paste0("cl", cell_line_label),
# condition = gsub(pattern = "Tumor", replacement = "T", x = condition),
# timepoint_cell_harvesting = gsub(pattern = "timepoint", replacement = "tp", x=timepoint_cell_harvesting),
# technical_replicate = paste0("repl",technical_replicate)) %>%
# dplyr::mutate(alt_name = paste(experiment, cell_line_label, technical_replicate, condition, timepoint_cell_harvesting, sep="-")) %>%
# dplyr::select(bsf_sample_name, alt_name)
#
# readr::write_tsv(metadata_multiqc, file = paste0(OUTPUT_DIR, experiment_name, "_metadata_multiqc.tsv"))#TODO:
# [ ] - add md5sums for tx2gene_file
gtf_file <- file.path(annotation_dir, "Mus_musculus.GRCm38.102.gtf") # needed only once to generate tx2gene.tsv
tx2gene_file <- file.path(annotation_dir, "EnsDb.Mmusculus.v102_tx2gene.tsv")
tx2gene_file_md5sum <- file.path(annotation_dir, "EnsDb.Mmusculus.v102_tx2gene.md5sum")
if (!file.exists(tx2gene_file)) {
message(tx2gene_file, " does not exist: generating!")
txDB <- GenomicFeatures::makeTxDbFromGFF(gtf_file, format="gtf")
# AnnotationDbi::keys
tx_name <- annot_db_keys(txDB, keytype="TXNAME")
# AnnotationDbi::select
tx2gene_gtf <- annot_db_select(txDB,
keys = tx_name,
columns="GENEID",
keytype = "TXNAME")
# saving tx2gene into destdir
write.table(tx2gene_gtf,
file = tx2gene_file,
sep = "\t",
col.names = FALSE,
row.names = FALSE,
quote = FALSE)
tx2gene_md5sum <- digest::digest(tx2gene_file, file=TRUE, algo="md5")
write.table(paste(tx2gene_md5sum, basename(tx2gene_file), collapse = "\t"),
file = tx2gene_file_md5sum,
sep = "\t",
col.names = FALSE,
row.names = FALSE,
quote = FALSE)
} else {
message(tx2gene_file, " exists: loading!")
tx2gene <- readr::read_tsv(tx2gene_file,
col_names = c("tx_id", "gene_id"),
show_col_types = FALSE)
}precomputed_objects_filename <- paste0(experiment_name, "_dds_objects.RData")
precomputed_objects_file <- file.path(output_dir, precomputed_objects_filename)
precomputed_transf_objects_filename <- paste0(experiment_name, "_log2_vsd_filt_objects.RData")
precomputed_transf_objects_file <- file.path(output_dir, precomputed_transf_objects_filename)
if(!file.exists(precomputed_objects_file)){
cat("RNA objects file '", precomputed_objects_filename, "' does not exist in the OUTPUT_DIR...\n")
cat("generating", precomputed_objects_filename, "\n")
# Generating dds and filtered dds objects ----
#require(DESeq2)
# here there is a single sample so we use ~1.
# expect a warning that there is only a single sample...
# generating dds object ----
# txi_object <- generateTxi(metadata_object=metadata,
# column_names=c("absolute_quant_files_path", "filenames"),
# tx2gene = tx2gene)
quant_files <- metadata$absolute_quant_files_path
names(quant_files) <- metadata$sample_name
txi_object <- tximport::tximport(quant_files,
type="salmon",
tx2gene = tx2gene,
ignoreTxVersion = TRUE)
sum(rownames(metadata) == colnames(txi_object$counts)) # check if names match between metadata and counts data
cat("... txi_object\n")
dds <- DESeq2::DESeqDataSetFromTximport(txi_object,
colData = metadata,
design = as.formula(paste0("~", experiment_design)))
cat("... dds\n")
dds <- DESeq2::estimateSizeFactors(dds)
dds <- DESeq2::DESeq(dds, minReplicatesForReplace=Inf) # do not replace outliers based on replicates
# filtering dds
cat("... dds filtering\n")
dds_filt <- filterDatasets(dds, abs_filt = TRUE,
abs_filt_samples = abs_filt_samples) # at least in N samples, which is a smallest group size
dds_filt <- DESeq2::estimateSizeFactors(dds_filt)
# Generate annotation file using biomaRt
#FIXME:
# Issues with Curl when using https:// and http:// throws a warning:
# Warning: Ensembl will soon enforce the use of https.
# Ensure the 'host' argument includes "https://"
cat("... fetching annotation from biomart\n")
ensemblAnnot <- generateEnsemblAnnotation(ensembl_ids = rownames(dds),
host=biomart_host,
version=biomart_Ens_version,
dataset=biomart_dataset)
cat("saving...", precomputed_objects_file, "\n")
cat("...into:", output_dir, "\n")
save(dds, dds_filt, ensemblAnnot, file = precomputed_objects_file)
} else{
cat("RNA objects file '", precomputed_objects_filename, "' exist...loading\n")
load(precomputed_objects_file)
}## RNA objects file ' nk_tum_immunoedit_complete_dds_objects.RData ' exist...loading
#transformation after filtering
if(!file.exists(precomputed_transf_objects_file)){
# transformation
log2_norm_filt <- DESeq2::normTransform(dds_filt)
vsd_filt <- DESeq2::vst(dds_filt, blind = TRUE) # blind = TRUE for QC
#rld_filt <- rlog(dds_filt, blind = TRUE)
#rld_filt - too big for >100 samples; skipping!
#rld_filt <- rlog(dds_filt, blind = FALSE) # not blind to batch effects
save(log2_norm_filt, vsd_filt,
file = precomputed_transf_objects_file)
} else{
load(precomputed_transf_objects_file)
}Parameters for dataset preparation:
| Parameter | Value | Decription |
|---|---|---|
| experiment_name | nk_tum_immunoedit_complete | this can be a subset analysis - e.g. just batch1,… |
| experiment_design | 1 | used only to construct initial dds object |
| abs_filt_samples | 3 | # at least in N samples, which is a smallest group size |
| biomart_host | http://nov2020.archive.ensembl.org | |
| biomart_Ens_version | Ensembl Genes 102 | |
| biomart_dataset | mmusculus_gene_ensembl | |
Objects overview:
dds_objects_overview <- data.frame(dds_object = c("dds", "dds_filt"),
n_samples = c(ncol(dds), ncol(dds_filt)),
n_genes = c(nrow(dds), nrow(dds_filt)))
dds_objects_overview %>%
kableExtra::kbl(caption = "dds objects overview") %>%
kableExtra::kable_classic(full_width = T)| dds_object | n_samples | n_genes |
|---|---|---|
| dds | 169 | 51149 |
| dds_filt | 169 | 15430 |
# DT::datatable(dds_objects_overview,
# options = list(pageLength = 10),
# caption = 'Table 1: Samples in validation set')#import::from(.from = SummarizedExperiment, colData, assay)
import::from(.from = DT, datatable)
import::from(.from = ggplot2, .all=TRUE)
import::from(.from = DataExplorer, plot_bar)
#import::from(.from = tableone, CreateTableOne)
import::from(.from = dplyr, group_by, count)experiment_name="nk_tum_immunoedit_complete"
output_dir <- paste0(results_dir, "/",experiment_name,"/") # "/home/rstudio/workspace/results_dir/nk_tum_immunoedit_complete/" #
# results_dir defined in the nk_immunoediting.Rmd file
eda_plots_dir <- paste0(output_dir, "eda_plots/")
dir.create(eda_plots_dir)
# raw and filtered dds
precomputed_objects_filename <- paste0(experiment_name, "_dds_objects.RData") #"nk_tum_immunoedit_complete_dds_objects.RData" #
precomputed_objects_file <- file.path(output_dir, precomputed_objects_filename)
# log2 and vsd transformed
precomputed_transf_objects_filename <- "nk_tum_immunoedit_complete_log2_vsd_filt_objects.RData"
precomputed_transf_objects_file <- file.path(output_dir, precomputed_transf_objects_filename)
# check if object loaded if not load
if(!exists(precomputed_objects_file)){
load(precomputed_objects_file)
load(precomputed_transf_objects_file)
}
metadata <- tibble::as_tibble(SummarizedExperiment::colData(dds_filt))
key_metadata <- metadata %>%
dplyr::select(experiment, condition_tp,condition, timepoint_cell_harvesting, day_cell_harvesting, cell_line_label, technical_replicate)
# saving metadata
openxlsx::write.xlsx(metadata, file = file.path(output_dir, paste0(experiment_name, "_metadata.xlsx")))Metadata overview:
# 'KeyTable', ,'FixedHeader', 'Responsive', 'Scroller', 'ColReorder'
# to add show entries - https://stackoverflow.com/questions/64976684/how-to-have-buttons-and-a-show-entries-using-datatables-dt-in-rmarkdown
DT::datatable(data = metadata,
caption = 'Metadata overview',
filter = 'top',
class = 'hover',
extensions = c('Buttons'), options = list(
pageLength = 5,
#autoWidth = TRUE
#fixedHeader = TRUE,
scrollX = TRUE,
#scrollY = TRUE,
#scrollY = 200,
#deferRender = TRUE,
#scroller = TRUE,
#colReorder = TRUE,
#keys = TRUE,
searchHighlight = TRUE,
dom = 'Blfrtip',
buttons = list('copy', 'colvis', list(
extend = 'collection',
buttons = c('csv', 'excel', 'pdf'),
text = 'Download'
))
)
)# 1. metadata overview
# 2. composition per experiment
# 3. relationship (table(x, y)) - to see how many variables we have in each category; data imbalance!?
# frequency of key variables ----
# fig.show='hide' - to supress plotting here
key_variables_freq <- DataExplorer::plot_bar(key_metadata, order_bar=TRUE, ggtheme = theme_bw(), title = "Frequency of key variables")# table of variable frequency per experiment ----
key_variables_tableOne <- tableone::CreateTableOne(vars = colnames(dplyr::select(key_metadata, -experiment)),
strata = c("experiment"),
data = key_metadata)
# relate variables ----
# cat_cat_plot <- ggplot(data = key_metadata, color=cell_line_label) +
# geom_count(mapping = aes(x = experiment, y = condition_tp)) #+ facet_wrap(~cell_line_label)
cat_cat_counts <- key_metadata %>%
dplyr::group_by(cell_line_label) %>%
dplyr::count(experiment, condition_tp)
cat_cat_plot <- ggplot(data = cat_cat_counts, mapping = aes(x = experiment, y = condition_tp, group=cell_line_label)) +
# geom_tile(mapping = aes(fill = n))
geom_point(mapping=aes(size=n, color=cell_line_label), position = position_dodge(width=0.5)) +
scale_size(breaks = c(0,1,2,3,4,5,6)) +
# ggrepel::geom_label_repel(aes(label = n),
# box.padding = 0.35,
# point.padding = 0.5,
# segment.color = 'grey50') +
ggtitle("Number of samples pre condition/experiment.") +
theme_bw()Frequency of key variable:
print(key_variables_freq$page_1)ggsave(filename = paste0(eda_plots_dir, "key_variables_freq_plot.png"),
plot=key_variables_freq$page_1,
width = 20, height = 20, units = "cm")Overview of number of samples in different categories (experiment, condition,…).:
#print(key_variables_tableOne$CatTable)
tableone::kableone(key_variables_tableOne$CatTable,
caption = "Overview of number of samples in different categories (experiment, condition,...).") %>%
kableExtra::kable_material(c("striped", "hover"))| EXP9.4 | EXP9.5 | EXP9.6 | EXP9.7 | EXP9.8 | p | test | |
|---|---|---|---|---|---|---|---|
| n | 18 | 36 | 39 | 40 | 36 | ||
| condition_tp (%) | <0.001 | ||||||
| Tumor_only_timepoint_0 | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 6 (16.7) | ||
| Tumor_only_timepoint_1 | 6 (33.3) | 12 (33.3) | 12 (30.8) | 12 (30.0) | 6 (16.7) | ||
| Tumor_only_timepoint_2 | 0 ( 0.0) | 0 ( 0.0) | 4 (10.3) | 4 (10.0) | 6 (16.7) | ||
| Tumor_plus_NK_timepoint_1 | 6 (33.3) | 12 (33.3) | 12 (30.8) | 12 (30.0) | 0 ( 0.0) | ||
| Tumor_plus_NK_timepoint_2 | 6 (33.3) | 12 (33.3) | 11 (28.2) | 12 (30.0) | 0 ( 0.0) | ||
| Tumor_plus_WT_NK_timepoint_2 | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 6 (16.7) | ||
| Tumor_plus_IFNg_timepoint_2 | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 6 (16.7) | ||
| Tumor_plus_PrfKO_NK_timepoint_2 | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 6 (16.7) | ||
| condition (%) | <0.001 | ||||||
| Tumor_only | 6 (33.3) | 12 (33.3) | 16 (41.0) | 16 (40.0) | 18 (50.0) | ||
| Tumor_plus_NK | 12 (66.7) | 24 (66.7) | 23 (59.0) | 24 (60.0) | 0 ( 0.0) | ||
| Tumor_plus_IFNg | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 6 (16.7) | ||
| Tumor_plus_WT_NK | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 6 (16.7) | ||
| Tumor_plus_PrfKO_NK | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 6 (16.7) | ||
| timepoint_cell_harvesting (%) | <0.001 | ||||||
| timepoint_0 | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 6 (16.7) | ||
| timepoint_1 | 12 (66.7) | 24 (66.7) | 24 (61.5) | 24 (60.0) | 6 (16.7) | ||
| timepoint_2 | 6 (33.3) | 12 (33.3) | 15 (38.5) | 16 (40.0) | 24 (66.7) | ||
| day_cell_harvesting (%) | <0.001 | ||||||
| Day0 | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 6 (16.7) | ||
| Day4 | 12 (66.7) | 24 (66.7) | 24 (61.5) | 0 ( 0.0) | 6 (16.7) | ||
| Day6 | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 24 (60.0) | 0 ( 0.0) | ||
| Day10 | 6 (33.3) | 12 (33.3) | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | ||
| Day14 | 0 ( 0.0) | 0 ( 0.0) | 15 (38.5) | 0 ( 0.0) | 24 (66.7) | ||
| Day16 | 0 ( 0.0) | 0 ( 0.0) | 0 ( 0.0) | 16 (40.0) | 0 ( 0.0) | ||
| cell_line_label (%) | <0.001 | ||||||
| A | 0 ( 0.0) | 9 (25.0) | 10 (25.6) | 10 (25.0) | 18 (50.0) | ||
| B | 0 ( 0.0) | 9 (25.0) | 10 (25.6) | 10 (25.0) | 0 ( 0.0) | ||
| C | 9 (50.0) | 9 (25.0) | 10 (25.6) | 10 (25.0) | 0 ( 0.0) | ||
| D | 9 (50.0) | 9 (25.0) | 9 (23.1) | 10 (25.0) | 18 (50.0) | ||
| technical_replicate (%) | 0.998 | ||||||
| 1 | 6 (33.3) | 12 (33.3) | 16 (41.0) | 16 (40.0) | 12 (33.3) | ||
| 2 | 6 (33.3) | 12 (33.3) | 12 (30.8) | 12 (30.0) | 12 (33.3) | ||
| 3 | 6 (33.3) | 12 (33.3) | 11 (28.2) | 12 (30.0) | 12 (33.3) |
Plot of number of samples in each condition across different experiments:
print(cat_cat_plot)ggsave(filename = paste0(eda_plots_dir, "key_variables_relate_plot.png"),
plot=cat_cat_plot,
width = 20, height = 20, units = "cm")# import::from(.from = SummarizedExperiment, colData)
# import::from(.from = DT, datatable)
# import::from(.from = ggplot2, .all=TRUE)
# import::from(.from = DataExplorer, plot_bar)
# import::from(.from = tableone, CreateTableOne)
# import::from(.from = dplyr, group_by, count)
#import::from(.from = SummarizedExperiment, colData, assay)
import::from(.from = variancePartition, fitExtractVarPartModel, sortCols, plotVarPart)
import::from(.from = doParallel, registerDoParallel)
import::from(.from = parallel, makeCluster, stopCluster)
# import utils scripts
import::from(.from = here::here("utils/rnaSelectTopVarGenes.R"), "rnaSelectTopVarGenes", .character_only=TRUE)
import::from(.from = here::here("utils/edaFunctions.R"), "varPartitionEstimate", "generatePCA", "pcaExtractVariance", "pcaPlotVariance", "pcaCorrPCs", "pcaCorrPCsPlot", .character_only=TRUE)experiment_name="nk_tum_immunoedit_complete"
output_dir <- paste0(results_dir, "/",experiment_name,"/") # "/home/rstudio/workspace/results_dir/nk_tum_immunoedit_complete/"
# results_dir defined in the nk_immunoediting.Rmd file
eda_plots_dir <- paste0(output_dir, "eda_plots/")
dir.create(eda_plots_dir)
# raw and filtered dds
precomputed_objects_filename <-paste0(experiment_name, "_dds_objects.RData") # "nk_tum_immunoedit_complete_dds_objects.RData"
precomputed_objects_file <- file.path(output_dir, precomputed_objects_filename)
# log2 and vsd transformed
precomputed_transf_objects_filename <- paste0(experiment_name, "_log2_vsd_filt_objects.RData") #"nk_tum_immunoedit_complete_log2_vsd_filt_objects.RData"
precomputed_transf_objects_file <- file.path(output_dir, precomputed_transf_objects_filename)
# check if object loaded if not load
if(!exists(precomputed_objects_file)){
load(precomputed_objects_file)
load(precomputed_transf_objects_file)
}
metadata <- tibble::as_tibble(SummarizedExperiment::colData(dds_filt))
key_metadata <- metadata %>%
dplyr::select(experiment, condition_tp,condition, timepoint_cell_harvesting, day_cell_harvesting, cell_line_label, technical_replicate)
# saving metadata
#openxlsx::write.xlsx(metadata, file = file.path(output_dir, paste0(experiment_name, "_metadata.xlsx")))
# preparing vsd per experiment ----
experiments <- unique(as.character(vsd_filt$experiment))
vsd_filt_perExp <- purrr::map(.x = experiments, .f = function(experiment_name){
vsd_filt[,vsd_filt$experiment == experiment_name]
}) %>% setNames(experiments)
#lapply(vsd_filt_perExp, dim)cond_interest <- "condition_tp" # column of interest for PCA plots etc.
cond_interest_varPart <- c("condition_tp", "condition", "timepoint_cell_harvesting", "cell_line_label", "technical_replicate", "experiment")
cond_interest_varPart_corr <- c("condition_tp", "cell_line_label", "experiment", "technical_replicate")
# #~ (1|technical_replicate) + (1|experiment) + (1|cell_line_label) + (1|condition_tp)
fitform_partVariance <- ~(1 | technical_replicate) + (1 | experiment) + (1 | cell_line_label) + (1 | timepoint_cell_harvesting) + (1 | condition)
padj_cutoff = 0.05
log2FC_cutoff = 0.58 #(FC=1.5); log2FC=1.0 # (FC=2)
var_expl_needed <- 0.6 # at least 60% variance explained needed
transf_object <- vsd_filt # which object to use for pca, variance partition etc. in case there is e.g. log2norm, rlog,...# calculating overall variance partition ----
variance_partition_all <- varPartitionEstimate(transf_object = transf_object,
fitform_partVariance = fitform_partVariance,
ntop_genes=1000,
ncores = 12)## Selecting 1000 Ntop genes based on variance.
# calculating variance partition per experiment ----
fitform_partVariance_perExperiment <- ~(1 | technical_replicate) + (1 | cell_line_label) + (1 | timepoint_cell_harvesting) + (1 | condition)
variance_partition_perExperiment <- purrr::map(.x=names(vsd_filt_perExp), .f=function(experiment_name){
varPartitionEstimate(transf_object = vsd_filt_perExp[[experiment_name]],
fitform_partVariance = fitform_partVariance_perExperiment,
ntop_genes=1000,
ncores = 12)
}) %>% setNames(names(vsd_filt_perExp))## Selecting 1000 Ntop genes based on variance.
## Selecting 1000 Ntop genes based on variance.
## Selecting 1000 Ntop genes based on variance.
## Selecting 1000 Ntop genes based on variance.
## Selecting 1000 Ntop genes based on variance.
#print(key_variables_tableOne$CatTable)
variance_partition_all_plot <- variance_partition_all$varPart_plot_annot + ggtitle("Variance partition all experiments")
print(variance_partition_all_plot)ggsave(filename = paste0(eda_plots_dir, "variance_partition_all_plot.png"),
plot=variance_partition_all_plot,
width = 20, height = 20, units = "cm")Variance explained by different variables of interest. There seems to be a lot of variation coming from different experiments and cell lines. This will need to be accounted in the design or downstream batch effect removal.
#print(key_variables_tableOne$CatTable)
variance_partition_all$varPart_stats %>%
kableExtra::kbl(caption="Variance partition table - all experiments") %>%
kableExtra::kable_material(c("striped", "hover"))| variable | median_varExplained | mean_varExplained | IQR_varExplained | max_varExplained | min_varExplained |
|---|---|---|---|---|---|
| experiment | 26.5 | 35.9 | 52.3 | 99.7 | 0.0 |
| cell_line_label | 2.8 | 13.4 | 10.4 | 96.6 | 0.0 |
| condition | 0.8 | 7.7 | 8.0 | 82.8 | 0.0 |
| timepoint_cell_harvesting | 0.0 | 1.2 | 0.9 | 33.3 | 0.0 |
| technical_replicate | 0.0 | 0.3 | 0.2 | 6.8 | 0.0 |
| Residuals | 34.1 | 41.5 | 46.0 | 100.0 | 0.3 |
*Please note color coding varies between variance_partition_perExperiment
variance_partition_perExperiment_plots <- purrr::map(.x=names(variance_partition_perExperiment), .f = function(experiment_name){
variance_partition_perExperiment[[experiment_name]]$varPart_plot_annot + ggtitle(paste0("Variance partition experiment - ", experiment_name))
}) %>% setNames(names(variance_partition_perExperiment))
variance_partition_perExperiment_plots <- purrr::map(.x=names(variance_partition_perExperiment), .f = function(experiment_name){
print(variance_partition_perExperiment_plots[[experiment_name]])
ggsave(filename = paste0(eda_plots_dir, paste0("variance_partition_", experiment_name,"_plot.png")),
plot=variance_partition_perExperiment_plots[[experiment_name]],
width = 20, height = 20, units = "cm")
}) %>% setNames(names(variance_partition_perExperiment))# calculating how many components are needed ----
transf_object_counts_ntop <- rnaSelectTopVarGenes(SummarizedExperiment::assay(transf_object), ntop = 1000, type = "var")## Selecting 1000 Ntop genes based on variance.
pca_object <- stats::prcomp(t(transf_object_counts_ntop), center = TRUE)
pca_results <- pcaExtractVariance(pca_object)
pca_scree_cumVarExpl <- pcaPlotVariance(pca_results, var_expl_needed = var_expl_needed)
min_components_needed <- which(pca_results$cummulative_variance > var_expl_needed)[1] # min components to get var_expl_needed
# calculating components correlation with variables ----
#TODO:
# fix - correlation between PCs and variables of interest! - fix sample naming!
# PC_corr <- pcaCorrPCs(pca_object, metadata[cond_interest_varPart_corr], pcs = 1:min_components_needed, pval_exact = TRUE)
# PC_corr_plot <- pcaCorrPCsPlot(PC_corr)
# calculating overall PCA ----
condition_tp_colors <- RColorBrewer::brewer.pal(length(unique(metadata$condition_tp)),"Dark2")
names(condition_tp_colors) <- levels(metadata$condition_tp)
pca_all <- generatePCA(transf_object = transf_object,
cond_interest_varPart = cond_interest_varPart,
color_variable = "condition_tp",
shape_variable = "experiment",
ntop_genes = 1000) +
ggtitle("Overall PCA") +
ggplot2::scale_color_manual(values = condition_tp_colors)
pca_perExperiment_plots <- purrr::map(.x=names(vsd_filt_perExp), .f = function(experiment_name){
pca_plot <- generatePCA(transf_object = vsd_filt_perExp[[experiment_name]],
cond_interest_varPart = cond_interest_varPart,
color_variable = "condition_tp",
shape_variable = "cell_line_label",
ntop_genes = 1000)
pca_plot + ggtitle(experiment_name) +
# make universal color, shape across the whole report!
ggplot2::scale_shape_manual(values = c("A" = 16, "B" = 15, "C"=17, "D"=3)) +
ggplot2::scale_color_manual(values = condition_tp_colors)
}) %>% setNames(names(vsd_filt_perExp))
combined_pca_plot <- ggpubr::ggarrange(pca_perExperiment_plots$EXP9.4,
pca_perExperiment_plots$EXP9.5,
pca_perExperiment_plots$EXP9.6,
pca_perExperiment_plots$EXP9.7,
pca_perExperiment_plots$EXP9.8,
nrow=3, ncol=2,
common.legend = FALSE) #patchwork::wrap_plots(pca_perExperiment_plots, ncol=2)print(pca_scree_cumVarExpl)ggsave(filename = paste0(eda_plots_dir, "pca_scree_cumVarExpl_plot.png"),
plot=pca_scree_cumVarExpl,
width = 20, height = 20, units = "cm")Overall PCA plot:
print(pca_all)ggsave(filename = paste0(eda_plots_dir, "pca_all_plot.png"),
plot=pca_all,
width = 20, height = 20, units = "cm")PCA plot per experiment:
print(combined_pca_plot)ggsave(filename = paste0(eda_plots_dir, "pca_perExperiment_plot.png"),
plot=combined_pca_plot,
width = 20, height = 20, units = "cm")# importing only key functions that are actually used - not to polute namespace!
import::from(.from = readr, read_csv, cols)
import::from(magrittr, "%>%")
import::from(dplyr, mutate, select, filter, rename, arrange, desc, group_by, summarise, ungroup) # dplyr_mutate = mutate
import::from(purrr, map)
import::from(future, plan, multisession, sequential)
import::from(furrr, furrr_options, future_map2)
import::from(ggplot2, .all=TRUE) # importing all as there is too many
import::from(grid, gpar) # needed in complexheatmap
import::from(kableExtra, kable_styling, kbl)
import::from(.from = GenomicFeatures, makeTxDbFromGFF)
import::from(.from = AnnotationDbi, annot_db_keys = keys, annot_db_select = select)
import::from(.from = DESeq2, .all=TRUE)
import::from(.from = tximport, tximport)
import::from(.from = here::here("utils/filterDatasets.R"), "filterDatasets", .character_only=TRUE) # used for filtering
import::from(.from = here::here("utils/generateEnsemblAnnotation.R"), "generateEnsemblAnnotation", .character_only=TRUE) # used for filtering#project setup
# loading config file ----
config_file <- file.path( "nk_tum_immunoediting_config.yaml") # params$config_file; params not found? ; change to project_config.yaml
config <- yaml::read_yaml(config_file)
#config <- yaml::read_yaml(params$config_file)
experiment_name="nk_tum_immunoedit_complete" # this can be a subset analysis - e.g. just batch1,...
#experiment_name="nk_tum_immunoedit_complete_ab2_AB_CD_sep" # this can be a subset analysis - e.g. just batch1,...
experiment_design="1" # used only to construct initial dds object
abs_filt_samples=3
# parameters for annotation
biomart_host = "http://nov2020.archive.ensembl.org"
#biomart_host = "https://www.ensembl.org"
biomart_Ens_version = "Ensembl Genes 102"
biomart_dataset="mmusculus_gene_ensembl"
# directories
output_dir = paste0("./results_dir", "/",experiment_name,"/")
dir.create(output_dir)Preparing metadata and expression data and combining into a dds (DESeqDataSet object) that is ready for analyses. dds object is further pre-filtered to remove lowly expressed genes (FPM (fragments per million mapped fragments) > 1 in at least 3 samples). Salmon quantification (quant.sf) transcript abundance estimates (transcript expression) is imported and converted to gene expression using tximport.
# Preparing metadata
rnaseq_metadata_file <- here::here(config$metadata$rnaseq$rnaseq_metadata)
rnaseq_metadata_df_raw <- readr::read_csv(file = rnaseq_metadata_file) # 133 samples; 42 samples from other project?
quantseq_files <- here::here(config$metadata$rnaseq$rnaseq_salmon_files)
# salmon quant files
quant_files_md5sums_raw <- readr::read_table(file = quantseq_files, col_names = c("md5sum", "file_path"))
quant_files_md5sums <- quant_files_md5sums_raw %>%
dplyr::mutate(absolute_quant_files_path = gsub(pattern = "\\./", replacement = paste0(dirname(quantseq_files), "/"), file_path),
sample_name = gsub(pattern = "(\\./)(S_.+)(_0_.+)", replacement = "\\2", file_path)) %>%
# for batch3
dplyr::mutate(sample_name = gsub(pattern = "(\\./)(R.+)(_S.+)", replacement = "\\2", sample_name)) %>%
# EK were samples from other experiment - they were removed from quant.sf list, but in case remove them here as well!
dplyr::filter(!grepl(pattern = "\\./EK.+", x = sample_name)) %>%
dplyr::mutate(bsf_sample_name = gsub(pattern = "(\\./)(S_.+)(_quant/quant.sf)", replacement = "\\2", x = file_path)) %>%
# for batch 3
dplyr::mutate(bsf_sample_name = gsub(pattern = "(\\./)(R.+_S.+)(_quant/quant.sf)", replacement = "\\2", x = bsf_sample_name))
# experiment info - match experiment to run
experiment_info_batch1_2_raw <- readr::read_csv("datasets/metadata/rnaseq_merge_sample_sheet.csv", col_types =cols(.default = "c"))
experiment_info_batch3_raw <- readr::read_csv("datasets/metadata/rnaseq_sample_sheet_batch3.csv", col_types = cols(.default = "c"))
experiment_info_raw <- dplyr::bind_rows(experiment_info_batch1_2_raw, experiment_info_batch3_raw)
# EK samples are from different experiment
experiment_info <- experiment_info_raw %>%
dplyr::filter(!grepl(pattern = "EK.+", x = original_sample_name)) %>%
dplyr::rename(library_name = experiment,
bsf_sample_name = sample_name) %>%
# fixing library_name for experiment EXP9.8
dplyr::mutate(library_name = dplyr::if_else(library_name == "BSF_1241", "R0128_L5464", library_name)) %>%
dplyr::mutate(sample_name_unique = dplyr::case_when(grepl(pattern = "^R", x=original_sample_name) ~ paste0(original_sample_name, "_", library_name),
TRUE ~ paste0("S_", original_sample_name, "_", library_name)))
# loading metadata and adding experiment and salmon data
rnaseq_metadata_df <- rnaseq_metadata_df_raw %>%
dplyr::mutate(condition_complete = condition) %>%
dplyr::mutate(condition = gsub(pattern = "(.+)(_Day.+)", replacement = "\\1", condition_complete)) %>%
dplyr::mutate(condition = gsub(pattern = "\\+", replacement = "_plus_", condition),
timepoint_cell_harvesting = gsub(pattern = " ", replacement = "_", timepoint_cell_harvesting),
experiment = gsub(pattern = " ", replacement = "", original_experiment)) %>%
dplyr::mutate(sample_name_unique = dplyr::case_when(grepl(pattern = "^R", x=sample_name) ~ paste0(sample_name, "_", library_name),
TRUE ~ paste0("S_", sample_name, "_", library_name)))
quant_files_md5sums_annot <- quant_files_md5sums %>%
dplyr::left_join(., experiment_info, by = "bsf_sample_name") %>%
dplyr::select(-sample_name, -library_name)
rnaseq_metadata_df_annot <- rnaseq_metadata_df %>%
dplyr::left_join(., quant_files_md5sums_annot, by="sample_name_unique") %>%
dplyr::mutate(original_sample_name = sample_name) %>%
dplyr::mutate(sample_name = sample_name_unique) %>%
dplyr::mutate(filenames = sample_name_unique)
#table(rnaseq_metadata_df_annot$original_experiment)
#table(rnaseq_metadata_df_annot$cell_line_label)
rnaseq_metadata_df_annot_reduced <- rnaseq_metadata_df_annot %>%
#dplyr::filter(original_experiment == "EXP9_6") %>%
#dplyr::filter(condition %in% c("Tumor_only", "Tumor_plus_NK_Day14")) %>%
dplyr::mutate(technical_replicate = factor(technical_replicate, levels = c("1", "2", "3")),
condition = factor(condition, levels = c("Tumor_only", "Tumor_plus_NK", "Tumor_plus_IFNg", "Tumor_plus_WT_NK", "Tumor_plus_PrfKO_NK")),
timepoint_cell_harvesting = factor(timepoint_cell_harvesting, levels = c("timepoint_0", "timepoint_1", "timepoint_2")),
cell_line_label = as.factor(cell_line_label),
experiment = factor(experiment, levels = c("EXP9.4", "EXP9.5", "EXP9.6", "EXP9.7", "EXP9.8")),
condition_tp = paste0(condition,"_",timepoint_cell_harvesting)) %>%
dplyr::mutate(condition_tp = factor(condition_tp, levels = c("Tumor_only_timepoint_0",
"Tumor_only_timepoint_1", "Tumor_only_timepoint_2",
"Tumor_plus_NK_timepoint_1", "Tumor_plus_NK_timepoint_2",
"Tumor_plus_WT_NK_timepoint_2",
"Tumor_plus_IFNg_timepoint_2", "Tumor_plus_PrfKO_NK_timepoint_2"))) %>%
#fixing day_cell_harvesting
dplyr::mutate(day_cell_harvesting = gsub(pattern = " ", replacement = "", day_cell_harvesting)) %>%
dplyr::mutate(day_cell_harvesting = factor(day_cell_harvesting, levels=c("Day0", "Day4", "Day6", "Day10", "Day14", "Day16")))
metadata <- rnaseq_metadata_df_annot_reduced #cond_data
#rm(reseq_metadata)
rownames(metadata) <- metadata$filenames
# use alternative sample names for multiQC
# renaming for the multiQC report
# metadata_multiqc <- metadata %>%
# dplyr::select(bsf_sample_name, experiment, technical_replicate, cell_line_label, condition, timepoint_cell_harvesting) %>%
# dplyr::mutate(cell_line_label = paste0("cl", cell_line_label),
# condition = gsub(pattern = "Tumor", replacement = "T", x = condition),
# timepoint_cell_harvesting = gsub(pattern = "timepoint", replacement = "tp", x=timepoint_cell_harvesting),
# technical_replicate = paste0("repl",technical_replicate)) %>%
# dplyr::mutate(alt_name = paste(experiment, cell_line_label, technical_replicate, condition, timepoint_cell_harvesting, sep="-")) %>%
# dplyr::select(bsf_sample_name, alt_name)
#
# readr::write_tsv(metadata_multiqc, file = paste0(OUTPUT_DIR, experiment_name, "_metadata_multiqc.tsv"))#TODO:
# [ ] - add md5sums for tx2gene_file
data_dir <- file.path("datasets")
annotation_dir = paste0(data_dir, "/annotations/")
gtf_file <- file.path(annotation_dir, "Mus_musculus.GRCm38.102.gtf") # needed only once to generate tx2gene.tsv
tx2gene_file <- file.path(annotation_dir, "EnsDb.Mmusculus.v102_tx2gene.tsv")
tx2gene_file_md5sum <- file.path(annotation_dir, "EnsDb.Mmusculus.v102_tx2gene.md5sum")
if (!file.exists(tx2gene_file)) {
message(tx2gene_file, " does not exist: generating!")
txDB <- GenomicFeatures::makeTxDbFromGFF(gtf_file, format="gtf")
# AnnotationDbi::keys
tx_name <- annot_db_keys(txDB, keytype="TXNAME")
# AnnotationDbi::select
tx2gene_gtf <- annot_db_select(txDB,
keys = tx_name,
columns="GENEID",
keytype = "TXNAME")
# saving tx2gene into destdir. Potentially there is a typo here. The name should be tx2gene, not _gtf
write.table(tx2gene_gtf,
file = tx2gene_file,
sep = "\t",
col.names = FALSE,
row.names = FALSE,
quote = FALSE)
tx2gene_md5sum <- digest::digest(tx2gene_file, file=TRUE, algo="md5")
write.table(paste(tx2gene_md5sum, basename(tx2gene_file), collapse = "\t"),
file = tx2gene_file_md5sum,
sep = "\t",
col.names = FALSE,
row.names = FALSE,
quote = FALSE)
} else {
message(tx2gene_file, " exists: loading!")
tx2gene <- readr::read_tsv(tx2gene_file,
col_names = c("tx_id", "gene_id"),
show_col_types = FALSE)
}precomputed_objects_filename <- paste0("nk_tum_immunoedit_complete", "_dds_objects.RData")
precomputed_objects_file <- file.path(output_dir, precomputed_objects_filename)
precomputed_transf_objects_filename <- paste0("nk_tum_immunoedit_complete", "_log2_vsd_filt_objects.RData")
precomputed_transf_objects_file <- file.path(output_dir, precomputed_transf_objects_filename)
if(!file.exists(precomputed_objects_file)){
cat("RNA objects file '", precomputed_objects_filename, "' does not exist in the OUTPUT_DIR...\n")
cat("generating", precomputed_objects_filename, "\n")
# Generating dds and filtered dds objects ----
#require(DESeq2)
# here there is a single sample so we use ~1.
# expect a warning that there is only a single sample...
# generating dds object ----
# txi_object <- generateTxi(metadata_object=metadata,
# column_names=c("absolute_quant_files_path", "filenames"),
# tx2gene = tx2gene)
quant_files <- metadata$absolute_quant_files_path
names(quant_files) <- metadata$sample_name
txi_object <- tximport::tximport(quant_files,
type="salmon",
tx2gene = tx2gene,
ignoreTxVersion = TRUE)
sum(rownames(metadata) == colnames(txi_object$counts)) # check if names match between metadata and counts data
cat("... txi_object\n")
dds <- DESeq2::DESeqDataSetFromTximport(txi_object,
colData = metadata,
design = as.formula(paste0("~", experiment_design)))
cat("... dds\n")
dds <- DESeq2::estimateSizeFactors(dds) # Isn't the data already normalized by Salmon?
dds <- DESeq2::DESeq(dds, minReplicatesForReplace=Inf) # do not replace outliers based on replicates
# filtering dds
cat("... dds filtering\n")
dds_filt <- filterDatasets(dds, abs_filt = TRUE,
abs_filt_samples = abs_filt_samples) # at least in N samples, which is a smallest group size
dds_filt <- DESeq2::estimateSizeFactors(dds_filt)
# Generate annotation file using biomaRt
#FIXME:
# Issues with Curl when using https:// and http:// throws a warning:
# Warning: Ensembl will soon enforce the use of https.
# Ensure the 'host' argument includes "https://"
cat("... fetching annotation from biomart\n")
ensemblAnnot <- generateEnsemblAnnotation(ensembl_ids = rownames(dds),
host=biomart_host,
version=biomart_Ens_version,
dataset=biomart_dataset)
cat("saving...", precomputed_objects_file, "\n")
cat("...into:", output_dir, "\n")
save(dds, dds_filt, ensemblAnnot, file = precomputed_objects_file)
} else{
cat("RNA objects file '", precomputed_objects_filename, "' exist...loading\n")
load(precomputed_objects_file)
}## RNA objects file ' nk_tum_immunoedit_complete_dds_objects.RData ' exist...loading
#transformation after filtering
if(!file.exists(precomputed_transf_objects_file)){
# transformation
log2_norm_filt <- DESeq2::normTransform(dds_filt)
vsd_filt <- DESeq2::vst(dds_filt, blind = TRUE) # blind = TRUE for QC
#rld_filt <- rlog(dds_filt, blind = TRUE)
#rld_filt - too big for >100 samples; skipping!
#rld_filt <- rlog(dds_filt, blind = FALSE) # not blind to batch effects
save(log2_norm_filt, vsd_filt,
file = precomputed_transf_objects_file)
} else{
load(precomputed_transf_objects_file)
}Parameters for dataset preparation:
| Parameter | Value | Decription |
|---|---|---|
| experiment_name | nk_tum_immunoedit_complete | this can be a subset analysis - e.g. just batch1,… |
| experiment_design | 1 | used only to construct initial dds object |
| abs_filt_samples | 3 | # at least in N samples, which is a smallest group size |
| biomart_host | http://nov2020.archive.ensembl.org | |
| biomart_Ens_version | Ensembl Genes 102 | |
| biomart_dataset | mmusculus_gene_ensembl | |
Objects overview:
dds_objects_overview <- data.frame(dds_object = c("dds", "dds_filt"),
n_samples = c(ncol(dds), ncol(dds_filt)),
n_genes = c(nrow(dds), nrow(dds_filt)))
dds_objects_overview %>%
kableExtra::kbl(caption = "dds objects overview") %>%
kableExtra::kable_classic(full_width = T)| dds_object | n_samples | n_genes |
|---|---|---|
| dds | 169 | 51149 |
| dds_filt | 169 | 15430 |
# DT::datatable(dds_objects_overview,
# options = list(pageLength = 10),
# caption = 'Table 1: Samples in validation set')#import::from(.from = SummarizedExperiment, colData, assay)
import::from(.from = variancePartition, fitExtractVarPartModel, sortCols, plotVarPart)
import::from(.from = doParallel, registerDoParallel)
import::from(.from = parallel, makeCluster, stopCluster)
# import utils scripts
import::from(.from = here::here("utils/filterDatasets.R"), "filterDatasets", .character_only=TRUE) # used for filtering
import::from(.from = here::here("utils/rnaSelectTopVarGenes.R"), "rnaSelectTopVarGenes", .character_only=TRUE)
import::from(.from = here::here("utils/edaFunctions.R"), "varPartitionEstimate", "generatePCA", "pcaExtractVariance", "pcaPlotVariance", "pcaCorrPCs", "pcaCorrPCsPlot", "generatePCA_plus_shape", .character_only=TRUE)
import::from(.from = here::here("utils/generateResults.R"), "generateResults_upd", .character_only=TRUE) #
import::from(.from = here::here("utils/plotDegResults.R"), "plotVolcano","plotVolcano_repel", .character_only=TRUE)
library(ggrepel)Comparison between Tumor_plus_NK vs Tumor_only - specifically Tumor_plus_NK_timepoint_2 against Tumor_only_timepoint_1. Tumor_plus_WT_NK_timepoint_2 was renamed to Tumor_plus_NK_timepoint_2
Steps:
abs_filt_samples=3
padj_cutoff = 0.05
log2FC_cutoff = 0.58 #(FC=1.5); log2FC=1.0 # (FC=2)
var_expl_needed <- 0.6 # at least 60% variance explained needed
# compare Tumor_only between timepoints!
# condition_tp_subset <- c("Tumor_only_timepoint_0",
# "Tumor_only_timepoint_1",
# "Tumor_only_timepoint_2",
# "Tumor_plus_NK_timepoint_1",
# "Tumor_plus_NK_timepoint_2",
# "Tumor_plus_WT_NK_timepoint_2")
condition_tp_subset <- c("Tumor_only_timepoint_1",
"Tumor_plus_NK_timepoint_2",
"Tumor_plus_NK_timepoint_1",
"Tumor_plus_WT_NK_timepoint_2")
genes_of_interest <- list("Denn2b"="ENSMUSG00000031024",
"Gbp6"="ENSMUSG00000104713",
"H2-K1"="ENSMUSG00000061232",
"Hs3st2"="ENSMUSG00000046321",
"Htra3"="ENSMUSG00000029096",
"Ifi27"="ENSMUSG00000064215",
"Igtp"="ENSMUSG00000078853",
"Irf1"="ENSMUSG00000018899",
"Lgals3bp"="ENSMUSG00000033880",
"Ly6a"="ENSMUSG00000075602",
"Plaat3"="ENSMUSG00000060675",
"Rtp4"="ENSMUSG00000033355",
"Serpina3f"="ENSMUSG00000066363",
"Stat1"="ENSMUSG00000026104")
genes_of_interest_exp9.4_9.7 <- list(
'Ifi44' ="ENSMUSG00000028037",
'Ccl5' ="ENSMUSG00000035042",
'Iigp1' ="ENSMUSG00000054072",
'Serpina3f' ="ENSMUSG00000066363",
'Ly6a' ="ENSMUSG00000075602",
'Plaat3' ="ENSMUSG00000060675",
'Lgals3bp' ="ENSMUSG00000033880",
'Ifi27' ="ENSMUSG00000064215",
'Gbp6' ="ENSMUSG00000104713",
'Rtp4' ="ENSMUSG00000033355",
'Stat1' ="ENSMUSG00000026104",
'Tap1' ="ENSMUSG00000037321",
'H2-K1' ="ENSMUSG00000061232",
'B2m' ="ENSMUSG00000060802",
'Igtp' ="ENSMUSG00000078853",
'Gtf2ird1' ="ENSMUSG00000023079",
'Cdc42ep4' ="ENSMUSG00000041598",
'Hoxb6' ="ENSMUSG00000000690",
'Bfsp2' ="ENSMUSG00000032556",
'Sox6' ="ENSMUSG00000051910",
'Tgm2' ="ENSMUSG00000037820",
'Adprhl1' ="ENSMUSG00000031448")
# additional parameters ----
if(exists("deg_design")) {rm(deg_design)}
deg_design <- as.formula("~ experiment + cell_line_label + condition_tp")
# # raw and filtered dds
# precomputed_objects_filename <- paste0(experiment_name, "_dds_objects.RData")
# precomputed_objects_file <- file.path(output_dir, precomputed_objects_filename)
#
# # log2 and vsd transformed
# precomputed_transf_objects_filename <- paste0(experiment_name, "_log2_vsd_filt_objects.RData")
# precomputed_transf_objects_file <- file.path(output_dir, precomputed_transf_objects_filename)
#
# # check if object loaded if not load
# if(!exists(precomputed_objects_file)){
# load(precomputed_objects_file)
# load(precomputed_transf_objects_file)
# } # preparing dds and vsd objects for the main project comparison
# We use only EXP 9.4 - 9.7. EXP 9.8 is excluded.
# Preparing AB subset.
# defining parameters
deg_name <- "TpNK_tp2_vs_Tonly_tp1_AB"
deg_dir <- file.path(output_dir, paste0("deg_", deg_name, "/"))
dir.create(deg_dir)
precomputed_objects_deg_filename <- paste0(deg_name, "_dds_objects.RData")
precomputed_objects_deg_file <- file.path(deg_dir, precomputed_objects_deg_filename)
precomputed_transf_objects_deg_filename <- paste0(deg_name, "_log2_vsd_filt_objects.RData")
precomputed_transf_objects_deg_file <- file.path(deg_dir, precomputed_transf_objects_deg_filename)
experiment_subset <- c("EXP9.4", "EXP9.5", "EXP9.6", "EXP9.7")
# just in case remove any existing subsets
if(exists("dds_subset")) {rm(dds_subset)}
if(exists("dds_subset_filt")) {rm(dds_subset_filt)}
if(exists("vsd_subset")) {rm(vsd_subset)}
if(!file.exists(precomputed_objects_deg_file)){
cat("RNA objects file '", precomputed_objects_deg_filename, "' does not exist in the OUTPUT_DIR...\n")
cat("generating", precomputed_objects_deg_filename, "\n")
dds_subset <- dds_filt[ , dds_filt$condition_tp %in% condition_tp_subset]
dds_subset <- dds_subset[ , dds_subset$experiment %in% experiment_subset]
dds_subset <- dds_subset[ , dds_subset$cell_line_label %in% c("A", "B")]
# fixing Tumor_plus_WT_NK_timepoint_2 -> Tumor_plus_NK_timepoint_2
dds_subset$condition_tp <- droplevels(dds_subset$condition_tp)
table(dds_subset$condition_tp)
dds_subset$experiment <- droplevels(dds_subset$experiment)
table(dds_subset$experiment)
dds_subset$cell_line_label <- droplevels(dds_subset$cell_line_label)
table(dds_subset$cell_line_label)
# filtering lowly expressed genes
dds_subset_filt <- filterDatasets(dds_subset,
abs_filt = TRUE,
abs_filt_samples = abs_filt_samples) # at least in N samples, which is a smallest group size
cat("... dds\n")
design(dds_subset_filt) <- deg_design
dds_subset_filt <- DESeq2::estimateSizeFactors(dds_subset_filt)
dds_subset_filt <- DESeq2::DESeq(dds_subset_filt) # do not replace outliers based on replicates
cat("saving...", precomputed_objects_file, "\n")
cat("...into:", output_dir,deg_name,"\n")
save(dds_subset, dds_subset_filt, ensemblAnnot, file = precomputed_objects_deg_file)
} else{
cat("RNA objects file '", precomputed_objects_deg_filename, "' exist...loading\n")
load(precomputed_objects_deg_file)
}## RNA objects file ' TpNK_tp2_vs_Tonly_tp1_AB_dds_objects.RData ' exist...loading
#transformation after filtering
if(!file.exists(precomputed_transf_objects_deg_file)){
# transformation
log2_norm_subset_filt <- DESeq2::normTransform(dds_subset_filt)
vsd_subset_filt <- DESeq2::vst(dds_subset_filt, blind = FALSE) # using blind=FALSE utilize design info; blind = TRUE for QC
#rld_filt <- DESeq2::rlog(dds_subset_filt, blind = FALSE) # more robust to differences in sequencing depth!
#rld_filt - too big for >100 samples; skipping!
#rld_filt <- rlog(dds_filt, blind = FALSE) # not blind to batch effects
save(log2_norm_subset_filt, vsd_subset_filt,
file = precomputed_transf_objects_deg_file)
} else{
load(precomputed_transf_objects_deg_file)
}#get the counts table for diffTFs AB cell lines
fl_smpl_nm <- gsub("_Day\\d\\d","",dds_subset@colData@listData[["full_sample_name"]])
fl_smpl_nm <- gsub("_\\d_","_",fl_smpl_nm)
sample_name <- paste0(dds_subset@colData@listData[["original_experiment"]], "_",
fl_smpl_nm, "_",
dds_subset@colData@listData[["day_cell_harvesting"]], "_REP",
dds_subset@colData@listData[["technical_replicate"]])
sample_name <- gsub("^EXP\\s9.", "EXP9_", sample_name)
sample_name <- gsub("\\+", "_plus_", sample_name)
sample_name <- gsub("_Tumor_only_Day\\d_", "_Tumor_only_", sample_name)
#These samples from RNA-seq are missing in ATAC data.
#EXP9_6_13G_Tumor_plus_NK_Day14_REP1 is missing in ./
#EXP9_6_13G_Tumor_plus_NK_Day14_REP2 is missing in ./
#EXP9_6_13G_Tumor_plus_NK_Day14_REP3 is missing in ./
#EXP9_6_13H_Tumor_plus_NK_Day14_REP1 is missing in ./
#EXP9_6_13H_Tumor_plus_NK_Day14_REP2 is missing in ./
#EXP9_6_13H_Tumor_plus_NK_Day14_REP3 is missing in ./
#
#Instead there is Day_10 version.
#
#I'm going to replace in the RNA-seq count table the day 14 with day 10, so that samples match.
sample_name <- gsub("EXP9_6_13G_Tumor_plus_NK_Day14", "EXP9_6_13G_Tumor_plus_NK_Day10", sample_name)
sample_name <- gsub("EXP9_6_13H_Tumor_plus_NK_Day14", "EXP9_6_13H_Tumor_plus_NK_Day10", sample_name)
col_IDS <- c("ENSEMBL", sample_name)
RNA_seq_raw_counts <- dds_subset@assays@data@listData[["counts"]]
RNA_seq_raw_counts <- cbind(row.names(RNA_seq_raw_counts), RNA_seq_raw_counts)
RNA_seq_raw_counts <- rbind(col_IDS,RNA_seq_raw_counts)
write.table(RNA_seq_raw_counts, file = "~/workspace/RNA_seq_raw_counts_AB.tsv",
sep ="\t", quote = FALSE, row.names = FALSE, col.names = FALSE)
# Building the sample table
sampleID_list <- list("sampleID" = sample_name)
bamReads_list <- list("bamReads" = paste0("/home/aleksandr_b/bioinf_isilon/core_bioinformatics_unit/Internal/aleksandr/projects/ab3_20230220_nk_tumor_project/data_ATACSeq_nfcore_run/results/bwa/merged_library/", sample_name, ".mLb.clN.sorted.bam"))
cond_summ <- sample_name
cond_summ[stringr::str_detect(cond_summ, pattern = "_Tumor_only_")] <- "Tumor_only_TP_1"
cond_summ[stringr::str_detect(cond_summ, pattern = "_Tumor_plus_NK")] <- "Tumor_pls_NK_TP2"
conditionSummary_list <- list("conditionSummary" = cond_summ)
experiment_id_list <- list("experiment_id" = stringr::str_extract(sample_name, "EXP\\d_\\d"))
sampleData.tsv <- data.frame(sampleID_list,
bamReads_list,
conditionSummary_list,
experiment_id_list)
write.table(sampleData.tsv, file = "~/workspace/sampleData.tsv",
sep ="\t", quote = FALSE, row.names = FALSE)#print(key_variables_tableOne$CatTable)
rm(key_metadata_subset, key_variables_tableOne) # in case this exists from previus runs
key_metadata_subset <- as.data.frame(colData(dds_subset)) %>%
dplyr::select(experiment, condition_tp, cell_line_label, technical_replicate)
key_variables_tableOne <- tableone::CreateTableOne(vars = colnames(dplyr::select(key_metadata_subset, -condition_tp)),
strata = c("condition_tp"),
data = key_metadata_subset)
tableone::kableone(key_variables_tableOne$CatTable,
caption = "Overview of number of samples in different categories (experiment, condition,...).") %>%
kableExtra::kable_material(c("striped", "hover"))| Tumor_only_timepoint_1 | Tumor_plus_NK_timepoint_1 | Tumor_plus_NK_timepoint_2 | p | test | |
|---|---|---|---|---|---|
| n | 18 | 18 | 18 | ||
| experiment (%) | 1.000 | ||||
| EXP9.5 | 6 (33.3) | 6 (33.3) | 6 (33.3) | ||
| EXP9.6 | 6 (33.3) | 6 (33.3) | 6 (33.3) | ||
| EXP9.7 | 6 (33.3) | 6 (33.3) | 6 (33.3) | ||
| cell_line_label = B (%) | 9 (50.0) | 9 (50.0) | 9 (50.0) | 1.000 | |
| technical_replicate (%) | 1.000 | ||||
| 1 | 6 (33.3) | 6 (33.3) | 6 (33.3) | ||
| 2 | 6 (33.3) | 6 (33.3) | 6 (33.3) | ||
| 3 | 6 (33.3) | 6 (33.3) | 6 (33.3) |
#TODO:
# - plot images after removing batch effects!
# EDA and visualize plots before and after batch effect correction
# transf_object <- vsd_subset_filt
# pca_deg <- generatePCA(transf_object = transf_object,
# cond_interest_varPart = c("condition_tp", "cell_line_label", "experiment"),
# color_variable = "condition_tp",
# shape_variable = "experiment",
# ntop_genes = 1000) +
# ggtitle("Original dataset") +
# geom_text_repel(aes(label = transf_object$sample_name),
# box.padding = 3)
#
#
#
# pca_deg_plotCellLabel <- generatePCA(transf_object = transf_object,
# cond_interest_varPart = c("condition_tp", "cell_line_label", "experiment"),
# color_variable = "condition_tp",
# shape_variable = "cell_line_label",
# ntop_genes = 1000) +
# ggtitle("Original dataset")
#
# # Remove batch effect
# transf_batch_NObatch_experiment <- vsd_subset_filt
# transf_batch_NObatch_experiment_count <- limma::removeBatchEffect(SummarizedExperiment::assay(transf_batch_NObatch_experiment), transf_batch_NObatch_experiment$experiment)
# SummarizedExperiment::assay(transf_batch_NObatch_experiment) <- transf_batch_NObatch_experiment_count
#
# pca_deg_NObatch_experiment <- generatePCA(transf_object = transf_batch_NObatch_experiment,
# cond_interest_varPart = c("condition_tp", "cell_line_label", "experiment"),
# color_variable = "condition_tp",
# shape_variable = "experiment",
# ntop_genes = 1000) +
# ggtitle("Removed batchEffect - experiment")
#
# pca_deg_NObatch_experiment_plotCellLabel <- generatePCA(transf_object = transf_batch_NObatch_experiment,
# cond_interest_varPart = c("condition_tp", "cell_line_label", "experiment"),
# color_variable = "condition_tp",
# shape_variable = "cell_line_label",
# ntop_genes = 1000) +
# ggtitle("Removed batchEffect - experiment")
#
# #PCA_batch_merged <- ggpubr::ggarrange(pca_deg, pca_deg_NObatch_experiment, common.legend = TRUE)
#
#
# PCA_batch_merged <- ggpubr::ggarrange(pca_deg,
# pca_deg_NObatch_experiment,
# common.legend = TRUE)
#
# PCA_batch_highlightCellLines_merged <- ggpubr::ggarrange(pca_deg_plotCellLabel,
# pca_deg_NObatch_experiment_plotCellLabel,
# common.legend = TRUE)
###### Plots for publication #######
transf_object <- vsd_subset_filt
pca_deg <- generatePCA_plus_shape(transf_object = transf_object,
cond_interest_varPart = c("cell_line_label", "experiment","condition_tp"),
color_variable = "condition_tp",
shape_variable = "Cell_line_label_and_exp_id",
ntop_genes = 1000)+
ggtitle("Original dataset") +
scale_shape_manual(values = c(0,1,2,15,16,17)) +
scale_color_manual(values = c("#DD3344", "#FF9F1C" ,"#553388"))
pca_deg# remove batch effect
transf_batch_NObatch_experiment <- vsd_subset_filt
transf_batch_NObatch_experiment_count <- limma::removeBatchEffect(SummarizedExperiment::assay(transf_batch_NObatch_experiment), transf_batch_NObatch_experiment$experiment)
SummarizedExperiment::assay(transf_batch_NObatch_experiment) <- transf_batch_NObatch_experiment_count
pca_deg_NObatch_experiment <- generatePCA_plus_shape(transf_object = transf_batch_NObatch_experiment,
cond_interest_varPart = c("cell_line_label", "experiment","condition_tp"),
color_variable = "condition_tp",
shape_variable = "Cell_line_label_and_exp_id",
ntop_genes = 1000)+
ggtitle("Batch Corrected Dataset") +
scale_shape_manual(values = c(0,1,2,15,16,17)) +
scale_color_manual(values = c("#DD3344", "#FF9F1C" ,"#553388"))
pca_deg_NObatch_experimentggsave(filename = paste0(deg_dir,"publication/", "TpNK_tp2_vs_Tonly_tp1_AB_PCA_not_batch_corrected.png"),
plot=pca_deg,
width = 20, height = 14, units = "cm")
ggsave(filename = paste0(deg_dir,"publication/", "TpNK_tp2_vs_Tonly_tp1_AB_PCA_batch_corrected.png"),
plot=pca_deg_NObatch_experiment,
width = 20, height = 14, units = "cm")
ggsave(filename = paste0(deg_dir,"publication/", "TpNK_tp2_vs_Tonly_tp1_AB_PCA_not_batch_corrected.eps"),
plot=pca_deg,
width = 20, height = 14, units = "cm")
ggsave(filename = paste0(deg_dir,"publication/", "TpNK_tp2_vs_Tonly_tp1_AB_PCA_batch_corrected.eps"),
plot=pca_deg_NObatch_experiment,
width = 20, height = 14, units = "cm")The following PCA plots show effect of removing batch effects (experiment). There are two weird samples from EXP9.7 that create a lot of variance. What should i do with them?
print(pca_deg_NObatch_experiment)print(pca_deg)#resultsNames(dds_subset_filt)
# timepoint2 (T+NK)_vs_timepoint1 (T)
deg_TpNK_tp2_vs_Tonly_tp1_results <- generateResults_upd(dds_object = dds_subset_filt,
coeff_name = "condition_tp_Tumor_plus_NK_timepoint_2_vs_Tumor_only_timepoint_1",
cond_numerator = "Tumor_plus_NK_timepoint_2",
cond_denominator = "Tumor_only_timepoint_1",
cond_variable="condition_tp")
openxlsx::write.xlsx(deg_TpNK_tp2_vs_Tonly_tp1_results, file = file.path(deg_dir, "deg_TpNK_tp2_vs_Tonly_tp1_AB_results.xlsx"))
AB_results <- deg_TpNK_tp2_vs_Tonly_tp1_results# Loading MsigDB geneset collections ----
gs_hallmark <- hypeR::msigdb_gsets(species = "Mus musculus", category = c("H"), clean=TRUE)
gs_C2_kegg <- hypeR::msigdb_gsets(species = "Mus musculus", category = c("C2"), subcategory = "CP:KEGG", clean=TRUE)
gs_C5_GOBP <- hypeR::msigdb_gsets(species = "Mus musculus", category = c("C5"), subcategory = "GO:BP", clean=TRUE)
gs_C5_GOCC <- hypeR::msigdb_gsets(species = "Mus musculus", category = c("C5"), subcategory = "GO:CC", clean=TRUE)
gs_C5_GOMF <- hypeR::msigdb_gsets(species = "Mus musculus", category = c("C5"), subcategory = "GO:MF", clean=TRUE)
TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOBP <- hypeR::hypeR(signature = deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$mgi_symbol,
genesets = gs_C5_GOBP,
test="hypergeometric",
background=nrow(dds_subset_filt))
AB_GOBP <- TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOBP
TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOCC <- hypeR::hypeR(signature = deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$mgi_symbol,
genesets = gs_C5_GOCC,
test="hypergeometric",
background=nrow(dds_subset_filt))
AB_GOCC <- TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOCC
TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOMF <- hypeR::hypeR(signature = deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$mgi_symbol,
genesets = gs_C5_GOMF,
test="hypergeometric",
background=nrow(dds_subset_filt))
AB_GOMF <- TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOMF
#hypeR::hyp_show(hyp_obj$data$clA, simple = FALSE)
hypeR::hyp_to_excel(TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOBP, file_path=file.path(deg_dir, "TpNK_tp2_vs_Tonly_tp1_AB_enrichment_C5_GOBP.xlsx"))
hypeR::hyp_to_excel(TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOCC, file_path=file.path(deg_dir, "TpNK_tp2_vs_Tonly_tp1_AB_enrichment_C5_GOCC.xlsx"))
hypeR::hyp_to_excel(TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOMF, file_path=file.path(deg_dir, "TpNK_tp2_vs_Tonly_tp1_AB_enrichment_C5_GOMF.xlsx"))
# plotting enrichment results ----
TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOBP_plot <- hypeR::hyp_dots(TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOBP, merge=TRUE, fdr=0.05, top = 20, abrv=70, val="fdr", title="GOBP: T+NK timepoint2 vs T-only timepoint1 AB") +theme_bw()
TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOCC_plot <- hypeR::hyp_dots(TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOCC, merge=TRUE, fdr=0.05, top = 20, abrv=70, val="fdr", title="GOCC: T+NK timepoint2 vs T-only timepoint1 AB") +theme_bw()
TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOMF_plot <- hypeR::hyp_dots(TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOMF, merge=TRUE, fdr=0.05, top = 20, abrv=70, val="fdr", title="GOMF:T+NK timepoint2 vs T-only timepoint1 AB") + theme_bw()
###### For publication ######
ggsave(filename = paste0(deg_dir,"publication/" ,"TpNK_tp2_vs_Tonly_tp1_AB_hyp_GOBP_plot.png"),
plot=TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOBP_plot,
width = 20, height = 20, units = "cm")
ggsave(filename = paste0(deg_dir,"publication/" ,"TpNK_tp2_vs_Tonly_tp1_AB_hyp_GOBP_plot.eps"),
plot=TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOBP_plot,
width = 20, height = 20, units = "cm")Volcano plot highlighting genes of interest:
# volcano plot
# heatmap - corrected
genes_of_interest## $Denn2b
## [1] "ENSMUSG00000031024"
##
## $Gbp6
## [1] "ENSMUSG00000104713"
##
## $`H2-K1`
## [1] "ENSMUSG00000061232"
##
## $Hs3st2
## [1] "ENSMUSG00000046321"
##
## $Htra3
## [1] "ENSMUSG00000029096"
##
## $Ifi27
## [1] "ENSMUSG00000064215"
##
## $Igtp
## [1] "ENSMUSG00000078853"
##
## $Irf1
## [1] "ENSMUSG00000018899"
##
## $Lgals3bp
## [1] "ENSMUSG00000033880"
##
## $Ly6a
## [1] "ENSMUSG00000075602"
##
## $Plaat3
## [1] "ENSMUSG00000060675"
##
## $Rtp4
## [1] "ENSMUSG00000033355"
##
## $Serpina3f
## [1] "ENSMUSG00000066363"
##
## $Stat1
## [1] "ENSMUSG00000026104"
genes_of_interest_exp9.4_9.7## $Ifi44
## [1] "ENSMUSG00000028037"
##
## $Ccl5
## [1] "ENSMUSG00000035042"
##
## $Iigp1
## [1] "ENSMUSG00000054072"
##
## $Serpina3f
## [1] "ENSMUSG00000066363"
##
## $Ly6a
## [1] "ENSMUSG00000075602"
##
## $Plaat3
## [1] "ENSMUSG00000060675"
##
## $Lgals3bp
## [1] "ENSMUSG00000033880"
##
## $Ifi27
## [1] "ENSMUSG00000064215"
##
## $Gbp6
## [1] "ENSMUSG00000104713"
##
## $Rtp4
## [1] "ENSMUSG00000033355"
##
## $Stat1
## [1] "ENSMUSG00000026104"
##
## $Tap1
## [1] "ENSMUSG00000037321"
##
## $`H2-K1`
## [1] "ENSMUSG00000061232"
##
## $B2m
## [1] "ENSMUSG00000060802"
##
## $Igtp
## [1] "ENSMUSG00000078853"
##
## $Gtf2ird1
## [1] "ENSMUSG00000023079"
##
## $Cdc42ep4
## [1] "ENSMUSG00000041598"
##
## $Hoxb6
## [1] "ENSMUSG00000000690"
##
## $Bfsp2
## [1] "ENSMUSG00000032556"
##
## $Sox6
## [1] "ENSMUSG00000051910"
##
## $Tgm2
## [1] "ENSMUSG00000037820"
##
## $Adprhl1
## [1] "ENSMUSG00000031448"
TpNK_tp2_vs_Tonly_tp1_volcano_plot_1 <- plotVolcano(dds_results_obj = deg_TpNK_tp2_vs_Tonly_tp1_results$results_all,
genes_of_interest = names(genes_of_interest),
plot_title = "T+NK tp2 vs T-only tp1 AB genes set 1")
TpNK_tp2_vs_Tonly_tp1_volcano_plot_2 <- plotVolcano(dds_results_obj = deg_TpNK_tp2_vs_Tonly_tp1_results$results_all,
genes_of_interest = names(genes_of_interest_exp9.4_9.7),
plot_title = "T+NK tp2 vs T-only tp1 AB genes set 2")
##### For publication #####
TpNK_tp2_vs_Tonly_tp1_volcano_plot_2 <- plotVolcano_repel(dds_results_obj = deg_TpNK_tp2_vs_Tonly_tp1_results$results_all,
genes_of_interest = names(genes_of_interest_exp9.4_9.7),
plot_title = "T+NK tp2 vs T-only tp1 AB")
ggsave(filename = paste0(deg_dir,"publication/", "TpNK_tp2_vs_Tonly_tp1_AB_volcano_plot.png"),
plot=TpNK_tp2_vs_Tonly_tp1_volcano_plot_2,
width = 18, height = 12, units = "cm")
ggsave(filename = paste0(deg_dir,"publication/", "TpNK_tp2_vs_Tonly_tp1_AB_volcano_plot.eps"),
plot=TpNK_tp2_vs_Tonly_tp1_volcano_plot_2,
width = 18, height = 12, units = "cm")print(TpNK_tp2_vs_Tonly_tp1_volcano_plot_1)print(TpNK_tp2_vs_Tonly_tp1_volcano_plot_2)Heatmap of normalized and scaled expression of 607 significantly differentially expressed genes before and after removeal of batch effects coming from different experiments:
# heatmap and heatmap batch corrected - corrected
metadata_heatmap <- as.data.frame(colData(dds_subset))
heatmap_counts <- SummarizedExperiment::assay(transf_object)
heatmap_counts_NObatch <- SummarizedExperiment::assay(transf_batch_NObatch_experiment) # removed xperiment batch effects! ? use experimen+cell_line removed???
heatmap_counts_deg <- heatmap_counts[rownames(heatmap_counts) %in% deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$ensembl_id, ]
heatmap_counts_NObatch_deg <- heatmap_counts_NObatch[rownames(heatmap_counts_NObatch) %in% deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$ensembl_id, ]
annotation_col <- metadata_heatmap %>%
dplyr::select(experiment, cell_line_label, condition_tp) %>% # condition, timepoint_cell_harvesting,
dplyr::arrange(condition_tp, cell_line_label)
heatmap_counts_deg_ord <- heatmap_counts_deg[, match(rownames(annotation_col), colnames(heatmap_counts_deg))]
heatmap_counts_NObatch_deg_ord <- heatmap_counts_NObatch_deg[, match(rownames(annotation_col), colnames(heatmap_counts_NObatch_deg))]
ensembl2symbol_annot <- deg_TpNK_tp2_vs_Tonly_tp1_results$results_all %>%
dplyr::select(ensembl_id, mgi_symbol)
color.scheme <- rev(RColorBrewer::brewer.pal(8,"RdBu")) # generate the color scheme to use
ann_colors = list(
experiment = c( EXP9.5 = "#005f73", EXP9.6 = "#0a9396", EXP9.7 = "#94d2bd"),
condition_tp = c(Tumor_only_timepoint_1 = "#DD3344", Tumor_plus_NK_timepoint_1 = "#FF9F1C", Tumor_plus_NK_timepoint_2 = "#553388"),
cell_line_label = c(A = "#E9D8A6", B = "#D9BE6D")
)
heatmap_not_corrected <- pheatmap::pheatmap(heatmap_counts_deg_ord,
main = "Heatmap of signif. DEG",
scale = "row",
annotation_col = annotation_col,
annotation_colors = ann_colors,
#annotation_row = row_annot_symbols,
show_colnames = FALSE,
show_rownames = FALSE,
cluster_cols = FALSE,
#cluster_rows = counts_deg_ord_row_cor_hclust,
color = color.scheme,
fontsize = 10, fontsize_row = 10 #height=10, cellwidth = 11, cellheight = 11
) # ggsave(filename = paste0(deg_dir, "TpNK_tp2_vs_Tonly_tp1_AB_heatmap_not_corrected.png"),
plot=heatmap_not_corrected,
width = 20, height = 20, units = "cm")# heatmap and heatmap batch corrected - corrected
metadata_heatmap <- as.data.frame(colData(dds_subset))
heatmap_counts <- SummarizedExperiment::assay(transf_object)
heatmap_counts_NObatch <- SummarizedExperiment::assay(transf_batch_NObatch_experiment) # removed xperiment batch effects! ? use experimen+cell_line removed???
heatmap_counts_deg <- heatmap_counts[rownames(heatmap_counts) %in% deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$ensembl_id, ]
heatmap_counts_NObatch_deg <- heatmap_counts_NObatch[rownames(heatmap_counts_NObatch) %in% deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$ensembl_id, ]
annotation_col <- metadata_heatmap %>%
dplyr::select(experiment, cell_line_label, condition_tp) %>% # condition, timepoint_cell_harvesting,
dplyr::arrange(condition_tp, cell_line_label)
heatmap_counts_deg_ord <- heatmap_counts_deg[, match(rownames(annotation_col), colnames(heatmap_counts_deg))]
heatmap_counts_NObatch_deg_ord <- heatmap_counts_NObatch_deg[, match(rownames(annotation_col), colnames(heatmap_counts_NObatch_deg))]
ensembl2symbol_annot <- deg_TpNK_tp2_vs_Tonly_tp1_results$results_all %>%
dplyr::select(ensembl_id, mgi_symbol)
color.scheme <- rev(RColorBrewer::brewer.pal(8,"RdBu")) # generate the color scheme to use
ann_colors = list(
experiment = c( EXP9.5 = "#005f73", EXP9.6 = "#0a9396", EXP9.7 = "#94d2bd"),
condition_tp = c(Tumor_only_timepoint_1 = "#DD3344", Tumor_plus_NK_timepoint_1 = "#FF9F1C", Tumor_plus_NK_timepoint_2 = "#553388"),
cell_line_label = c(A = "#E9D8A6", B = "#D9BE6D")
)
heatmap_corrected <- pheatmap::pheatmap(heatmap_counts_NObatch_deg_ord,
main = "Heatmap of signif. DEG - Batch Corrected",
scale = "row",
annotation_col = annotation_col,
annotation_colors = ann_colors,
#annotation_row = row_annot_symbols,
show_colnames = FALSE,
show_rownames = FALSE,
cluster_cols = FALSE,
#cluster_rows = counts_deg_ord_row_cor_hclust,
color = color.scheme,
fontsize = 10, fontsize_row = 10 #height=10, cellwidth = 11, cellheight = 11
) # ggsave(filename = paste0(deg_dir, "TpNK_tp2_vs_Tonly_tp1_AB_heatmap_corrected.png"),
plot=heatmap_corrected,
width = 20, height = 20, units = "cm")
##### For publication #####
ggsave(filename = paste0(deg_dir, "publication/", "TpNK_tp2_vs_Tonly_tp1_AB_heatmap_corrected.eps"),
plot=heatmap_corrected,
width = 18, height = 20, units = "cm")
ggsave(filename = paste0(deg_dir, "publication/", "TpNK_tp2_vs_Tonly_tp1_AB_heatmap_corrected.png"),
plot=heatmap_corrected,
width = 18, height = 20, units = "cm")# identify the 25 gene set
minL2FC_Ids <- order(deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$log2FoldChange)[1:25]
maxL2FC_Ids <- order(decreasing = T, deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$log2FoldChange)[1:25]
L2FC_EMSEMBL <- deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$ensembl_id[c(minL2FC_Ids, maxL2FC_Ids)]
L2FC_EMSEMBL <- c(L2FC_EMSEMBL, "ENSMUSG00000075602", "ENSMUSG00000060675")
heatmap_counts_NObatch <- SummarizedExperiment::assay(transf_batch_NObatch_experiment) # removed xperiment batch effects! ? use experimen+cell_line removed???
heatmap_counts_NObatch_deg <- heatmap_counts_NObatch[rownames(heatmap_counts_NObatch) %in% L2FC_EMSEMBL, ]
annotation_col <- metadata_heatmap %>%
dplyr::select(experiment, cell_line_label, condition_tp) %>% # condition, timepoint_cell_harvesting,
dplyr::arrange(condition_tp, cell_line_label)
heatmap_counts_NObatch_deg_ord <- heatmap_counts_NObatch_deg[, match(rownames(annotation_col), colnames(heatmap_counts_NObatch_deg))]
ensembl2symbol_annot <- deg_TpNK_tp2_vs_Tonly_tp1_results$results_all %>%
dplyr::select(ensembl_id, mgi_symbol)
rownames(heatmap_counts_NObatch_deg_ord) <- ensembl2symbol_annot[match(x = row.names(heatmap_counts_NObatch_deg_ord), table = ensembl2symbol_annot$ensembl_id),]$mgi_symbol
color.scheme <- rev(RColorBrewer::brewer.pal(8,"RdBu")) # generate the color scheme to use
ann_colors = list(
experiment = c( EXP9.5 = "#005f73", EXP9.6 = "#0a9396", EXP9.7 = "#94d2bd"),
condition_tp = c(Tumor_only_timepoint_1 = "#DD3344", Tumor_plus_NK_timepoint_1 = "#FF9F1C", Tumor_plus_NK_timepoint_2 = "#553388"),
cell_line_label = c(A = "#E9D8A6", B = "#D9BE6D")
)
heatmap_corrected <- pheatmap::pheatmap(heatmap_counts_NObatch_deg_ord,
main = "Heatmap of signif. DEG - Batch Corrected 50 most regulated genes",
scale = "row",
annotation_col = annotation_col,
annotation_colors = ann_colors,
#annotation_row = row_annot_symbols,
show_colnames = FALSE,
show_rownames = T,
cluster_cols = FALSE,
#cluster_rows = counts_deg_ord_row_cor_hclust,
color = color.scheme,
fontsize = 10, fontsize_row = 10 #height=10, cellwidth = 11, cellheight = 11
) # <img src=“/home/rstudio/workspace/nk_tum_immunoediting_files/figure-html/03 AB”small heatmap”-1.png” width=“672” />
##### For publication #####
ggsave(filename = paste0(deg_dir, "publication/", "TpNK_tp2_vs_Tonly_tp1_AB_heatmap_corrected_50genes.eps"),
plot=heatmap_corrected,
width = 24, height = 20, units = "cm")
ggsave(filename = paste0(deg_dir, "publication/", "TpNK_tp2_vs_Tonly_tp1_AB_heatmap_corrected_50_genes.png"),
plot=heatmap_corrected,
width = 24, height = 20, units = "cm")# preparing dds and vsd objects for the main project comparison
# We use only EXP 9.4 - 9.7. EXP 9.8 is excluded.
# Preparing CD subset.
# defining parameters
deg_name <- "TpNK_tp2_vs_Tonly_tp1_CD"
deg_dir <- file.path(output_dir, paste0("deg_", deg_name, "/"))
dir.create(deg_dir)
precomputed_objects_deg_filename <- paste0(deg_name, "_dds_objects.RData")
precomputed_objects_deg_file <- file.path(deg_dir, precomputed_objects_deg_filename)
precomputed_transf_objects_deg_filename <- paste0(deg_name, "_log2_vsd_filt_objects.RData")
precomputed_transf_objects_deg_file <- file.path(deg_dir, precomputed_transf_objects_deg_filename)
experiment_subset <- c("EXP9.4", "EXP9.5", "EXP9.6", "EXP9.7")
# just in case remove any existing subsets
if(exists("dds_subset")) {rm(dds_subset)}
if(exists("dds_subset_filt")) {rm(dds_subset_filt)}
if(exists("vsd_subset")) {rm(vsd_subset)}
if(!file.exists(precomputed_objects_deg_file)){
cat("RNA objects file '", precomputed_objects_deg_filename, "' does not exist in the OUTPUT_DIR...\n")
cat("generating", precomputed_objects_deg_filename, "\n")
dds_subset <- dds_filt[ , dds_filt$condition_tp %in% condition_tp_subset]
dds_subset <- dds_subset[ , dds_subset$experiment %in% experiment_subset]
dds_subset <- dds_subset[ , dds_subset$cell_line_label %in% c("C", "D")]
# fixing Tumor_plus_WT_NK_timepoint_2 -> Tumor_plus_NK_timepoint_2
dds_subset$condition_tp <- droplevels(dds_subset$condition_tp)
table(dds_subset$condition_tp)
dds_subset$experiment <- droplevels(dds_subset$experiment)
table(dds_subset$experiment)
dds_subset$cell_line_label <- droplevels(dds_subset$cell_line_label)
table(dds_subset$cell_line_label)
# filtering lowly expressed genes
dds_subset_filt <- filterDatasets(dds_subset,
abs_filt = TRUE,
abs_filt_samples = abs_filt_samples) # at least in N samples, which is a smallest group size
cat("... dds\n")
design(dds_subset_filt) <- deg_design
dds_subset_filt <- DESeq2::estimateSizeFactors(dds_subset_filt)
dds_subset_filt <- DESeq2::DESeq(dds_subset_filt) # do not replace outliers based on replicates
cat("saving...", precomputed_objects_file, "\n")
cat("...into:", output_dir, "\n")
save(dds_subset, dds_subset_filt, ensemblAnnot, file = precomputed_objects_deg_file)
} else{
cat("RNA objects file '", precomputed_objects_deg_filename, "' exist...loading\n")
load(precomputed_objects_deg_file)
}## RNA objects file ' TpNK_tp2_vs_Tonly_tp1_CD_dds_objects.RData ' exist...loading
#transformation after filtering
if(!file.exists(precomputed_transf_objects_deg_file)){
# transformation
log2_norm_subset_filt <- DESeq2::normTransform(dds_subset_filt)
vsd_subset_filt <- DESeq2::vst(dds_subset_filt, blind = FALSE) # using blind=FALSE utilize design info; blind = TRUE for QC
#rld_filt <- DESeq2::rlog(dds_subset_filt, blind = FALSE) # more robust to differences in sequencing depth!
#rld_filt - too big for >100 samples; skipping!
#rld_filt <- rlog(dds_filt, blind = FALSE) # not blind to batch effects
save(log2_norm_subset_filt, vsd_subset_filt,
file = precomputed_transf_objects_deg_file)
} else{
load(precomputed_transf_objects_deg_file)
}#print(key_variables_tableOne$CatTable)
rm(key_metadata_subset, key_variables_tableOne) # in case this exists from previus runs
key_metadata_subset <- as.data.frame(colData(dds_subset)) %>%
dplyr::select(experiment, condition_tp, cell_line_label, technical_replicate)
key_variables_tableOne <- tableone::CreateTableOne(vars = colnames(dplyr::select(key_metadata_subset, -condition_tp)),
strata = c("condition_tp"),
data = key_metadata_subset)
tableone::kableone(key_variables_tableOne$CatTable,
caption = "Overview of number of samples in different categories (experiment, condition,...).") %>%
kableExtra::kable_material(c("striped", "hover"))| Tumor_only_timepoint_1 | Tumor_plus_NK_timepoint_1 | Tumor_plus_NK_timepoint_2 | p | test | |
|---|---|---|---|---|---|
| n | 24 | 24 | 23 | ||
| experiment (%) | 1.000 | ||||
| EXP9.4 | 6 (25.0) | 6 (25.0) | 6 (26.1) | ||
| EXP9.5 | 6 (25.0) | 6 (25.0) | 6 (26.1) | ||
| EXP9.6 | 6 (25.0) | 6 (25.0) | 5 (21.7) | ||
| EXP9.7 | 6 (25.0) | 6 (25.0) | 6 (26.1) | ||
| cell_line_label = D (%) | 12 (50.0) | 12 (50.0) | 11 (47.8) | 0.985 | |
| technical_replicate (%) | 1.000 | ||||
| 1 | 8 (33.3) | 8 (33.3) | 8 (34.8) | ||
| 2 | 8 (33.3) | 8 (33.3) | 8 (34.8) | ||
| 3 | 8 (33.3) | 8 (33.3) | 7 (30.4) |
#TODO:
# - plot images after removing batch effects!
# EDA and visualize plots before and after batch effect correction
transf_object <- vsd_subset_filt
pca_deg <- generatePCA(transf_object = transf_object,
cond_interest_varPart = c("condition_tp", "cell_line_label", "experiment"),
color_variable = "condition_tp",
shape_variable = "experiment",
ntop_genes = 1000) +
ggtitle("Original dataset")
pca_deg_plotCellLabel <- generatePCA(transf_object = transf_object,
cond_interest_varPart = c("condition_tp", "cell_line_label", "experiment"),
color_variable = "condition_tp",
shape_variable = "cell_line_label",
ntop_genes = 1000) +
ggtitle("Original dataset")
# Remove batch effect
# remove batch effect
transf_batch_NObatch_experiment <- vsd_subset_filt
transf_batch_NObatch_experiment_count <- limma::removeBatchEffect(SummarizedExperiment::assay(transf_batch_NObatch_experiment), transf_batch_NObatch_experiment$experiment)
SummarizedExperiment::assay(transf_batch_NObatch_experiment) <- transf_batch_NObatch_experiment_count
pca_deg_NObatch_experiment <- generatePCA(transf_object = transf_batch_NObatch_experiment,
cond_interest_varPart = c("condition_tp", "cell_line_label", "experiment"),
color_variable = "condition_tp",
shape_variable = "experiment",
ntop_genes = 1000) +
ggtitle("Removed batchEffect - experiment")
pca_deg_NObatch_experiment_plotCellLabel <- generatePCA(transf_object = transf_batch_NObatch_experiment,
cond_interest_varPart = c("condition_tp", "cell_line_label", "experiment"),
color_variable = "condition_tp",
shape_variable = "cell_line_label",
ntop_genes = 1000) +
ggtitle("Removed batchEffect - experiment")
#PCA_batch_merged <- ggpubr::ggarrange(pca_deg, pca_deg_NObatch_experiment, common.legend = TRUE)
PCA_batch_merged <- ggpubr::ggarrange(pca_deg,
pca_deg_NObatch_experiment,
common.legend = TRUE)
PCA_batch_highlightCellLines_merged <- ggpubr::ggarrange(pca_deg_plotCellLabel,
pca_deg_NObatch_experiment_plotCellLabel,
common.legend = TRUE)
#### for publication #####
transf_object <- vsd_subset_filt
pca_deg <- generatePCA_plus_shape(transf_object = transf_object,
cond_interest_varPart = c("cell_line_label", "experiment","condition_tp"),
color_variable = "condition_tp",
shape_variable = "Cell_line_label_and_exp_id",
ntop_genes = 1000)+
ggtitle("Original dataset") +
scale_shape_manual(values = c(0,1,2,5,15,16,17,18)) +
scale_color_manual(values = c("#DD3344", "#FF9F1C", "#553388"))
pca_deg# remove batch effect
transf_batch_NObatch_experiment <- vsd_subset_filt
transf_batch_NObatch_experiment_count <- limma::removeBatchEffect(SummarizedExperiment::assay(transf_batch_NObatch_experiment), transf_batch_NObatch_experiment$experiment)
SummarizedExperiment::assay(transf_batch_NObatch_experiment) <- transf_batch_NObatch_experiment_count
pca_deg_NObatch_experiment <- generatePCA_plus_shape(transf_object = transf_batch_NObatch_experiment,
cond_interest_varPart = c("cell_line_label", "experiment","condition_tp"),
color_variable = "condition_tp",
shape_variable = "Cell_line_label_and_exp_id",
ntop_genes = 1000)+
ggtitle("Batch Corrected Dataset") +
scale_shape_manual(values = c(0,1,2,5,15,16,17,18)) +
scale_color_manual(values = c("#DD3344", "#FF9F1C", "#553388"))
pca_deg_NObatch_experimentggsave(filename = paste0(deg_dir,"publication/", "TpNK_tp2_vs_Tonly_tp1_CD_PCA_not_batch_corrected.png"),
plot=pca_deg,
width = 20, height = 14, units = "cm")
ggsave(filename = paste0(deg_dir,"publication/", "TpNK_tp2_vs_Tonly_tp1_CD_PCA_batch_corrected.png"),
plot=pca_deg_NObatch_experiment,
width = 20, height = 14, units = "cm")
ggsave(filename = paste0(deg_dir,"publication/", "TpNK_tp2_vs_Tonly_tp1_CD_PCA_not_batch_corrected.eps"),
plot=pca_deg,
width = 20, height = 14, units = "cm")
ggsave(filename = paste0(deg_dir,"publication/", "TpNK_tp2_vs_Tonly_tp1_CD_PCA_batch_corrected.eps"),
plot=pca_deg_NObatch_experiment,
width = 20, height = 14, units = "cm")#resultsNames(dds_subset_filt)
# timepoint2 (T+NK)_vs_timepoint1 (T)
deg_TpNK_tp2_vs_Tonly_tp1_results <- generateResults_upd(dds_object = dds_subset_filt,
coeff_name = "condition_tp_Tumor_plus_NK_timepoint_2_vs_Tumor_only_timepoint_1",
cond_numerator = "Tumor_plus_NK_timepoint_2",
cond_denominator = "Tumor_only_timepoint_1",
cond_variable="condition_tp")
CD_results <- deg_TpNK_tp2_vs_Tonly_tp1_results
openxlsx::write.xlsx(deg_TpNK_tp2_vs_Tonly_tp1_results, file = file.path(deg_dir, "deg_TpNK_tp2_vs_Tonly_tp1_CD_results.xlsx"))#msigdbr::msigdbr_species()
#packageVersion("msigdbr")
#msigdbr::msigdbr_collections()
# Loading MsigDB geneset collections ----
gs_hallmark <- hypeR::msigdb_gsets(species = "Mus musculus", category = c("H"), clean=TRUE)
gs_C2_kegg <- hypeR::msigdb_gsets(species = "Mus musculus", category = c("C2"), subcategory = "CP:KEGG", clean=TRUE)
gs_C5_GOBP <- hypeR::msigdb_gsets(species = "Mus musculus", category = c("C5"), subcategory = "GO:BP", clean=TRUE)
gs_C5_GOCC <- hypeR::msigdb_gsets(species = "Mus musculus", category = c("C5"), subcategory = "GO:CC", clean=TRUE)
gs_C5_GOMF <- hypeR::msigdb_gsets(species = "Mus musculus", category = c("C5"), subcategory = "GO:MF", clean=TRUE)
TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOBP <- hypeR::hypeR(signature = deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$mgi_symbol,
genesets = gs_C5_GOBP,
test="hypergeometric",
background=nrow(dds_subset_filt))
CD_GOBP <- TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOBP
TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOCC <- hypeR::hypeR(signature = deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$mgi_symbol,
genesets = gs_C5_GOCC,
test="hypergeometric",
background=nrow(dds_subset_filt))
CD_GOCC <- TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOCC
TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOMF <- hypeR::hypeR(signature = deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$mgi_symbol,
genesets = gs_C5_GOMF,
test="hypergeometric",
background=nrow(dds_subset_filt))
CD_GOMF <- TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOMF
#hypeR::hyp_show(hyp_obj$data$clA, simple = FALSE)
hypeR::hyp_to_excel(TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOBP, file_path=file.path(deg_dir, "TpNK_tp2_vs_Tonly_tp1_CD_enrichment_C5_GOBP.xlsx"))
hypeR::hyp_to_excel(TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOCC, file_path=file.path(deg_dir, "TpNK_tp2_vs_Tonly_tp1_CD_enrichment_C5_GOCC.xlsx"))
hypeR::hyp_to_excel(TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOMF, file_path=file.path(deg_dir, "TpNK_tp2_vs_Tonly_tp1_CD_enrichment_C5_GOMF.xlsx"))
# plotting enrichment results ----
TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOBP_plot <- hypeR::hyp_dots(TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOBP, merge=TRUE, fdr=0.05, top = 20, abrv=70, val="fdr", title="GOBP: T+NK timepoint2 vs T-only timepoint1 CD")+theme_bw()
TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOCC_plot <- hypeR::hyp_dots(TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOCC, merge=TRUE, fdr=0.05, top = 20, abrv=70, val="fdr", title="GOCC: T+NK timepoint2 vs T-only timepoint1 CD")+theme_bw()
TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOMF_plot <- hypeR::hyp_dots(TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOMF, merge=TRUE, fdr=0.05, top = 20, abrv=70, val="fdr", title="GOMF:T+NK timepoint2 vs T-only timepoint1 CD")+theme_bw()
ggsave(filename = paste0(deg_dir, "TpNK_tp2_vs_Tonly_tp1_AB_hyp_GOBP_plot.png"),
plot=TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOBP_plot,
width = 20, height = 20, units = "cm")
ggsave(filename = paste0(deg_dir, "TpNK_tp2_vs_Tonly_tp1_AB_hyp_GOCC_plot.png"),
plot=TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOCC_plot,
width = 20, height = 20, units = "cm")
ggsave(filename = paste0(deg_dir, "TpNK_tp2_vs_Tonly_tp1_AB_hyp_GOMF_plot.png"),
plot=TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOMF_plot,
width = 20, height = 20, units = "cm")
###### For publication ######
ggsave(filename = paste0(deg_dir, "publication/", "TpNK_tp2_vs_Tonly_tp1_AB_hyp_GOBP_plot.png"),
plot=TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOBP_plot,
width = 20, height = 20, units = "cm")
ggsave(filename = paste0(deg_dir, "publication/", "TpNK_tp2_vs_Tonly_tp1_AB_hyp_GOBP_plot.eps"),
plot=TpNK_tp2_vs_Tonly_tp1_enrichment_C5_GOBP_plot,
width = 20, height = 20, units = "cm")Volcano plot highlighting genes of interest:
# volcano plot
# heatmap - corrected
genes_of_interest## $Denn2b
## [1] "ENSMUSG00000031024"
##
## $Gbp6
## [1] "ENSMUSG00000104713"
##
## $`H2-K1`
## [1] "ENSMUSG00000061232"
##
## $Hs3st2
## [1] "ENSMUSG00000046321"
##
## $Htra3
## [1] "ENSMUSG00000029096"
##
## $Ifi27
## [1] "ENSMUSG00000064215"
##
## $Igtp
## [1] "ENSMUSG00000078853"
##
## $Irf1
## [1] "ENSMUSG00000018899"
##
## $Lgals3bp
## [1] "ENSMUSG00000033880"
##
## $Ly6a
## [1] "ENSMUSG00000075602"
##
## $Plaat3
## [1] "ENSMUSG00000060675"
##
## $Rtp4
## [1] "ENSMUSG00000033355"
##
## $Serpina3f
## [1] "ENSMUSG00000066363"
##
## $Stat1
## [1] "ENSMUSG00000026104"
genes_of_interest_exp9.4_9.7## $Ifi44
## [1] "ENSMUSG00000028037"
##
## $Ccl5
## [1] "ENSMUSG00000035042"
##
## $Iigp1
## [1] "ENSMUSG00000054072"
##
## $Serpina3f
## [1] "ENSMUSG00000066363"
##
## $Ly6a
## [1] "ENSMUSG00000075602"
##
## $Plaat3
## [1] "ENSMUSG00000060675"
##
## $Lgals3bp
## [1] "ENSMUSG00000033880"
##
## $Ifi27
## [1] "ENSMUSG00000064215"
##
## $Gbp6
## [1] "ENSMUSG00000104713"
##
## $Rtp4
## [1] "ENSMUSG00000033355"
##
## $Stat1
## [1] "ENSMUSG00000026104"
##
## $Tap1
## [1] "ENSMUSG00000037321"
##
## $`H2-K1`
## [1] "ENSMUSG00000061232"
##
## $B2m
## [1] "ENSMUSG00000060802"
##
## $Igtp
## [1] "ENSMUSG00000078853"
##
## $Gtf2ird1
## [1] "ENSMUSG00000023079"
##
## $Cdc42ep4
## [1] "ENSMUSG00000041598"
##
## $Hoxb6
## [1] "ENSMUSG00000000690"
##
## $Bfsp2
## [1] "ENSMUSG00000032556"
##
## $Sox6
## [1] "ENSMUSG00000051910"
##
## $Tgm2
## [1] "ENSMUSG00000037820"
##
## $Adprhl1
## [1] "ENSMUSG00000031448"
TpNK_tp2_vs_Tonly_tp1_volcano_plot_1 <- plotVolcano(dds_results_obj = deg_TpNK_tp2_vs_Tonly_tp1_results$results_all,
genes_of_interest = names(genes_of_interest),
plot_title = "T+NK tp2 vs T-only tp1 CD genes set 1")
TpNK_tp2_vs_Tonly_tp1_volcano_plot_2 <- plotVolcano(dds_results_obj = deg_TpNK_tp2_vs_Tonly_tp1_results$results_all,
genes_of_interest = names(genes_of_interest_exp9.4_9.7),
plot_title = "T+NK tp2 vs T-only tp1 CD genes set 2")
ggsave(filename = paste0(deg_dir, "TpNK_tp2_vs_Tonly_tp1_CD_volcano_plot_gene_set_1.png"),
plot=TpNK_tp2_vs_Tonly_tp1_volcano_plot_1,
width = 20, height = 20, units = "cm")
ggsave(filename = paste0(deg_dir, "TpNK_tp2_vs_Tonly_tp1_CD_volcano_plot_gene_set_2.png"),
plot=TpNK_tp2_vs_Tonly_tp1_volcano_plot_2,
width = 20, height = 20, units = "cm")
##### for publication ######
TpNK_tp2_vs_Tonly_tp1_volcano_plot_2 <- plotVolcano_repel(dds_results_obj = deg_TpNK_tp2_vs_Tonly_tp1_results$results_all,
genes_of_interest = names(genes_of_interest_exp9.4_9.7),
plot_title = "T+NK tp2 vs T-only tp1 CD")
ggsave(filename = paste0(deg_dir,"publication/", "TpNK_tp2_vs_Tonly_tp1_CD_volcano_plot.png"),
plot=TpNK_tp2_vs_Tonly_tp1_volcano_plot_2,
width = 18, height = 12, units = "cm")
ggsave(filename = paste0(deg_dir,"publication/", "TpNK_tp2_vs_Tonly_tp1_CD_volcano_plot.eps"),
plot=TpNK_tp2_vs_Tonly_tp1_volcano_plot_2,
width = 18, height = 12, units = "cm")print(TpNK_tp2_vs_Tonly_tp1_volcano_plot_1)print(TpNK_tp2_vs_Tonly_tp1_volcano_plot_2)Heatmap of normalized and scaled expression of 321 significantly differentially expressed genes before and after removeal of batch effects coming from different experiments:
# heatmap and heatmap batch corrected - corrected
metadata_heatmap <- as.data.frame(colData(dds_subset))
heatmap_counts <- SummarizedExperiment::assay(transf_object)
heatmap_counts_NObatch <- SummarizedExperiment::assay(transf_batch_NObatch_experiment) # removed xperiment batch effects! ? use experimen+cell_line removed???
heatmap_counts_NObatch_deg <- heatmap_counts_NObatch[rownames(heatmap_counts_NObatch) %in% deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$ensembl_id, ]
annotation_col <- metadata_heatmap %>%
dplyr::select(experiment, cell_line_label, condition_tp) %>% # condition, timepoint_cell_harvesting,
dplyr::arrange(condition_tp, cell_line_label)
heatmap_counts_NObatch_deg_ord <- heatmap_counts_NObatch_deg[, match(rownames(annotation_col), colnames(heatmap_counts_NObatch_deg))]
ensembl2symbol_annot <- deg_TpNK_tp2_vs_Tonly_tp1_results$results_all %>%
dplyr::select(ensembl_id, mgi_symbol)
color.scheme <- rev(RColorBrewer::brewer.pal(8,"RdBu")) # generate the color scheme to use
ann_colors = list(
experiment = c(EXP9.4 = "#00262E", EXP9.5 = "#005f73", EXP9.6 = "#0a9396", EXP9.7 = "#94d2bd"),
condition_tp = c(Tumor_only_timepoint_1 = "#DD3344", Tumor_plus_NK_timepoint_1 = "#FF9F1C", Tumor_plus_NK_timepoint_2 = "#553388"),
cell_line_label = c(C = "#E9D8A6", D = "#D9BE6D")
)
heatmap_corrected <- pheatmap::pheatmap(heatmap_counts_NObatch_deg_ord,
main = "Heatmap of signif. DEG",
scale = "row",
annotation_col = annotation_col,
annotation_colors = ann_colors,
#annotation_row = row_annot_symbols,
show_colnames = FALSE,
show_rownames = FALSE,
cluster_cols = FALSE,
#cluster_rows = counts_deg_ord_row_cor_hclust,
color = color.scheme,
fontsize = 10, fontsize_row = 10 #height=10, cellwidth = 11, cellheight = 11
) # ggsave(filename = paste0(deg_dir, "TpNK_tp2_vs_Tonly_tp1_CD_heatmap_batch_corrected.png"),
plot=heatmap_corrected,
width = 20, height = 20, units = "cm")
###### For publication #####
ggsave(filename = paste0(deg_dir, "publication/", "TpNK_tp2_vs_Tonly_tp1_CD_heatmap_bathc_corrected.png"),
plot=heatmap_corrected,
width = 20, height = 20, units = "cm")
ggsave(filename = paste0(deg_dir, "publication/", "TpNK_tp2_vs_Tonly_tp1_CD_heatmap_batch_corrected.eps"),
plot=heatmap_corrected,
width = 20, height = 20, units = "cm")# identify the 25 gene set
minL2FC_Ids <- order(deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$log2FoldChange)[1:25]
maxL2FC_Ids <- order(decreasing = T, deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$log2FoldChange)[1:25]
L2FC_EMSEMBL <- deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$ensembl_id[c(minL2FC_Ids, maxL2FC_Ids)]
L2FC_EMSEMBL <- c(L2FC_EMSEMBL, "ENSMUSG00000075602", "ENSMUSG00000060675")
heatmap_counts_NObatch <- SummarizedExperiment::assay(transf_batch_NObatch_experiment) # removed xperiment batch effects! ? use experimen+cell_line removed???
heatmap_counts_NObatch_deg <- heatmap_counts_NObatch[rownames(heatmap_counts_NObatch) %in% L2FC_EMSEMBL, ]
annotation_col <- metadata_heatmap %>%
dplyr::select(experiment, cell_line_label, condition_tp) %>% # condition, timepoint_cell_harvesting,
dplyr::arrange(condition_tp, cell_line_label)
heatmap_counts_NObatch_deg_ord <- heatmap_counts_NObatch_deg[, match(rownames(annotation_col), colnames(heatmap_counts_NObatch_deg))]
ensembl2symbol_annot <- deg_TpNK_tp2_vs_Tonly_tp1_results$results_all %>%
dplyr::select(ensembl_id, mgi_symbol)
rownames(heatmap_counts_NObatch_deg_ord) <- ensembl2symbol_annot[match(x = row.names(heatmap_counts_NObatch_deg_ord), table = ensembl2symbol_annot$ensembl_id),]$mgi_symbol
color.scheme <- rev(RColorBrewer::brewer.pal(8,"RdBu")) # generate the color scheme to use
ann_colors = list(
experiment = c(EXP9.4 = "#00262E", EXP9.5 = "#005f73", EXP9.6 = "#0a9396", EXP9.7 = "#94d2bd"),
condition_tp = c(Tumor_only_timepoint_1 = "#DD3344", Tumor_plus_NK_timepoint_1 = "#FF9F1C", Tumor_plus_NK_timepoint_2 = "#553388"),
cell_line_label = c(C = "#E9D8A6", D = "#D9BE6D")
)
heatmap_corrected <- pheatmap::pheatmap(heatmap_counts_NObatch_deg_ord,
main = "Heatmap of signif. DEG - Batch Corrected 50 most regulated genes",
scale = "row",
annotation_col = annotation_col,
annotation_colors = ann_colors,
#annotation_row = row_annot_symbols,
show_colnames = FALSE,
show_rownames = T,
cluster_cols = FALSE,
#cluster_rows = counts_deg_ord_row_cor_hclust,
color = color.scheme,
fontsize = 10, fontsize_row = 10 #height=10, cellwidth = 11, cellheight = 11
) # <img src=“/home/rstudio/workspace/nk_tum_immunoediting_files/figure-html/03 CD”small heatmap”-1.png” width=“672” />
##### For publication #####
ggsave(filename = paste0(deg_dir, "publication/", "TpNK_tp2_vs_Tonly_tp1_CD_heatmap_corrected_50_genes.eps"),
plot=heatmap_corrected,
width = 24, height = 20, units = "cm")
ggsave(filename = paste0(deg_dir, "publication/", "TpNK_tp2_vs_Tonly_tp1_CD_heatmap_corrected_50_genes.png"),
plot=heatmap_corrected,
width = 24, height = 20, units = "cm")# heatmap and heatmap batch corrected - corrected
metadata_heatmap <- as.data.frame(colData(dds_subset))
heatmap_counts <- SummarizedExperiment::assay(transf_object)
heatmap_counts_NObatch <- SummarizedExperiment::assay(transf_batch_NObatch_experiment) # removed xperiment batch effects! ? use experimen+cell_line removed???
heatmap_counts_deg <- heatmap_counts[rownames(heatmap_counts) %in% deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$ensembl_id, ]
heatmap_counts_NObatch_deg <- heatmap_counts_NObatch[rownames(heatmap_counts_NObatch) %in% deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$ensembl_id, ]
annotation_col <- metadata_heatmap %>%
dplyr::select(experiment, cell_line_label, condition_tp) %>% # condition, timepoint_cell_harvesting,
dplyr::arrange(condition_tp, cell_line_label)
heatmap_counts_deg_ord <- heatmap_counts_deg[, match(rownames(annotation_col), colnames(heatmap_counts_deg))]
heatmap_counts_NObatch_deg_ord <- heatmap_counts_NObatch_deg[, match(rownames(annotation_col), colnames(heatmap_counts_NObatch_deg))]
ensembl2symbol_annot <- deg_TpNK_tp2_vs_Tonly_tp1_results$results_all %>%
dplyr::select(ensembl_id, mgi_symbol)
color.scheme <- rev(RColorBrewer::brewer.pal(8,"RdBu")) # generate the color scheme to use
ann_colors = list(
experiment = c(EXP9.4 = "#4682B4", EXP9.5 = "#7846B4", EXP9.6 = "#B47846", EXP9.7 = "#82B446", EXP9.8 = "grey")
)
heatmap_corrected <- pheatmap::pheatmap(heatmap_counts_NObatch_deg_ord,
main = "Heatmap of signif. DEG - removed experiment BatchEffect",
scale = "row",
annotation_col = annotation_col,
annotation_colors = ann_colors,
#annotation_row = row_annot_symbols,
show_colnames = FALSE,
show_rownames = FALSE,
cluster_cols = FALSE,
#cluster_rows = counts_deg_ord_row_cor_hclust,
color = color.scheme,
fontsize = 10, fontsize_row = 10 #height=10, cellwidth = 11, cellheight = 11
) # ggsave(filename = paste0(deg_dir, "TpNK_tp2_vs_Tonly_tp1_CD_heatmap_corrected.png"),
plot=heatmap_corrected,
width = 20, height = 20, units = "cm")AB_CD_UP_venn <- list("AB_up" = AB_results$results_signif[AB_results$results_signif$log2FoldChange > 0,]$mgi_symbol,
"CD_up" = CD_results$results_signif[CD_results$results_signif$log2FoldChange > 0,]$mgi_symbol)
AB_CD_DOWN_venn <- list("AB_down" = AB_results$results_signif[AB_results$results_signif$log2FoldChange < 0,]$mgi_symbol,
"CD_down" = CD_results$results_signif[CD_results$results_signif$log2FoldChange < 0,]$mgi_symbol)
library(ggvenn)
AB_CD_UP_venn_plot <- ggvenn(
AB_CD_UP_venn,
fill_color = c("#E0E2E3", "#8EA7CE"),
stroke_size = 0.5, set_name_size = 4
)
AB_CD_DOWN_venn_plot <- ggvenn(
AB_CD_DOWN_venn,
fill_color = c("#E0E2E3", "#8EA7CE"),
stroke_size = 0.5, set_name_size = 4
)
deg_name <- "TpNK_tp2_AB_CD_comparison"
deg_dir <- file.path(output_dir, paste0("deg_", deg_name, "/"))
intersect(AB_results$results_signif[AB_results$results_signif$log2FoldChange < 0,]$mgi_symbol,
CD_results$results_signif[CD_results$results_signif$log2FoldChange < 0,]$mgi_symbol)## [1] "Bfsp2" "Hspb8" "Ckap4" "Denn2b" "Hs3st2" "Mgll" "Cdc42ep4" "Egfl7"
## [9] "Akr1c13" "Apbb1" "Pygl" "Tcf7l1" "Gria2" "Tmem176a" "Golm1" "Ntrk3"
## [17] "Tmem255a" "Tmem38b" "Gm36120" "Arid5b" "n-R5s118" "H2ac15" "Ramp2" "Tmed6"
## [25] "Rgs7bp" "Ccdc120" "Slc47a1"
ggsave(filename = paste0(deg_dir, "AB_CD_UP_venn_plot.png"),
plot=AB_CD_UP_venn_plot,
width = 20, height = 20, units = "cm")
ggsave(filename = paste0(deg_dir, "AB_CD_UP_venn_plot.eps"),
plot=AB_CD_UP_venn_plot,
width = 20, height = 20, units = "cm")
ggsave(filename = paste0(deg_dir, "AB_CD_DOWN_venn_plot.png"),
plot=AB_CD_DOWN_venn_plot,
width = 20, height = 20, units = "cm")
ggsave(filename = paste0(deg_dir, "AB_CD_DOWN_venn_plot.eps"),
plot=AB_CD_DOWN_venn_plot,
width = 20, height = 20, units = "cm")
#aggreagated excel sheet for
Deg_AB_xlsx <- openxlsx::read.xlsx("~/workspace/results_dir/nk_tum_immunoedit_complete/deg_TpNK_tp2_vs_Tonly_tp1_AB/deg_TpNK_tp2_vs_Tonly_tp1_AB_results.xlsx",sheet = 'results_signif')
Deg_CD_xlsx <- openxlsx::read.xlsx("~/workspace/results_dir/nk_tum_immunoedit_complete/deg_TpNK_tp2_vs_Tonly_tp1_CD/deg_TpNK_tp2_vs_Tonly_tp1_CD_results.xlsx",sheet = 'results_signif')
AB_specific <- Deg_AB_xlsx[Deg_AB_xlsx$mgi_symbol %in% setdiff(Deg_AB_xlsx$mgi_symbol, Deg_CD_xlsx$mgi_symbol),]%>%
select("ensembl_id", "mgi_symbol", "mgi_description", "log2FoldChange", "padj")
CD_specific <- Deg_CD_xlsx[Deg_CD_xlsx$mgi_symbol %in% setdiff(Deg_CD_xlsx$mgi_symbol, Deg_AB_xlsx$mgi_symbol),]%>%
select("ensembl_id", "mgi_symbol", "mgi_description", "log2FoldChange", "padj")
AB_intersect_tmp <- Deg_AB_xlsx[which(Deg_AB_xlsx$mgi_symbol %in% intersect(Deg_AB_xlsx$mgi_symbol, Deg_CD_xlsx$mgi_symbol)), ] %>%
select("ensembl_id", "mgi_symbol", "mgi_description", "log2FoldChange", "padj") %>%
rename("AB_log2FoldChange" = "log2FoldChange", "AB_padj" = "padj")
CD_intersect_tmp <- Deg_CD_xlsx[which(Deg_CD_xlsx$mgi_symbol %in% intersect(Deg_AB_xlsx$mgi_symbol, Deg_CD_xlsx$mgi_symbol)), ] %>%
select("ensembl_id", "mgi_symbol", "mgi_description", "log2FoldChange", "padj") %>%
rename("CD_log2FoldChange" = "log2FoldChange", "CD_padj" = "padj")
CD_intersect_tmp <- CD_intersect_tmp[match(AB_intersect_tmp$mgi_symbol, CD_intersect_tmp$mgi_symbol),]
AB_CD_intersect <- cbind(AB_intersect_tmp, select(CD_intersect_tmp, "CD_log2FoldChange", "CD_padj"))
AB_CD_ABCD_diff_intersect <- list("AB_specific" = AB_specific,
"CD_specific" = CD_specific,
"AB_CD_intersect" = AB_CD_intersect)
openxlsx::write.xlsx(AB_CD_ABCD_diff_intersect,
"~/workspace/results_dir/nk_tum_immunoedit_complete/deg_TpNK_tp2_AB_CD_comparison/AB_CD_diff_and_interactions.xlsx")Make a small heatmap with genes, overlaping from AB and CD.
deg_name <- "TpNK_tp2_AB_CD_comparison"
deg_dir <- file.path(output_dir, paste0("deg_", deg_name, "/"))
dir.create(deg_dir)
precomputed_objects_deg_filename <- paste0(deg_name, "_dds_objects.RData")
precomputed_objects_deg_file <- file.path(deg_dir, precomputed_objects_deg_filename)
precomputed_transf_objects_deg_filename <- paste0(deg_name, "_log2_vsd_filt_objects.RData")
precomputed_transf_objects_deg_file <- file.path(deg_dir, precomputed_transf_objects_deg_filename)
experiment_subset <- c("EXP9.4", "EXP9.5", "EXP9.6", "EXP9.7")
AB_CD_intersect_genes <- intersect(AB_results$results_signif$mgi_symbol, CD_results$results_signif$mgi_symbol)
AB_res_subset <- AB_results$results_signif[AB_results$results_signif$mgi_symbol %in% AB_CD_intersect_genes,]
#CD_res_subset <- CD_results$results_signif[CD_results$results_signif$mgi_symbol %in% AB_CD_intersect_genes,]
AB_res_subset_UP <- AB_res_subset[AB_res_subset$log2FoldChange > 0,]
AB_res_subset_DOWN <- AB_res_subset[AB_res_subset$log2FoldChange < 0,]
#AB_res_subset_UP <- AB_res_subset_UP %>% arrange(desc(log2FoldChange)) %>% slice_head(n=20)
#AB_res_subset_DOWN <- AB_res_subset_DOWN %>% arrange(log2FoldChange) %>% slice_head(n=10)
#OR padj
AB_res_subset_UP <- AB_res_subset_UP %>% arrange(padj) %>% slice_head( n = 20)
AB_res_subset_DOWN <- AB_res_subset_DOWN %>% arrange(desc(padj)) %>% slice_head(n=10)
geneset <- c(AB_res_subset_UP$mgi_symbol, AB_res_subset_DOWN$mgi_symbol)
deg_name <- "TpNK_tp2_vs_Tonly_tp1_AB"
deg_dir <- file.path(output_dir, paste0("deg_", deg_name, "/"))
precomputed_objects_deg_filename <- paste0(deg_name, "_dds_objects.RData")
precomputed_objects_deg_file <- file.path(deg_dir, precomputed_objects_deg_filename)
precomputed_transf_objects_deg_filename <- paste0(deg_name, "_log2_vsd_filt_objects.RData")
precomputed_transf_objects_deg_file <- file.path(deg_dir, precomputed_transf_objects_deg_filename)
load(precomputed_objects_deg_file)
load(precomputed_transf_objects_deg_file)
transf_object <- vsd_subset_filt
transf_batch_NObatch_experiment <- vsd_subset_filt
transf_batch_NObatch_experiment_count <- limma::removeBatchEffect(SummarizedExperiment::assay(transf_batch_NObatch_experiment), transf_batch_NObatch_experiment$experiment)
SummarizedExperiment::assay(transf_batch_NObatch_experiment) <- transf_batch_NObatch_experiment_count
metadata_heatmap <- as.data.frame(colData(dds_subset))
heatmap_counts <- SummarizedExperiment::assay(transf_object)
heatmap_counts_NObatch <- SummarizedExperiment::assay(transf_batch_NObatch_experiment) # removed xperiment batch effects! ? use experimen+cell_line removed???
heatmap_counts_deg <- heatmap_counts[rownames(heatmap_counts) %in% deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$ensembl_id, ]
heatmap_counts_NObatch_deg <- heatmap_counts_NObatch[rownames(heatmap_counts_NObatch) %in% deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$ensembl_id, ]
annotation_col <- metadata_heatmap %>%
dplyr::select(experiment, cell_line_label, condition_tp) %>% # condition, timepoint_cell_harvesting,
dplyr::arrange(condition_tp, cell_line_label)
heatmap_counts_deg_ord <- heatmap_counts_deg[, match(rownames(annotation_col), colnames(heatmap_counts_deg))]
heatmap_counts_NObatch_deg_ord <- heatmap_counts_NObatch_deg[, match(rownames(annotation_col), colnames(heatmap_counts_NObatch_deg))]
ensembl2symbol_annot <- deg_TpNK_tp2_vs_Tonly_tp1_results$results_all %>%
dplyr::select(ensembl_id, mgi_symbol)
ensembl_gene_set <- ensembl2symbol_annot[which(ensembl2symbol_annot$mgi_symbol %in% geneset),]
ensembl_gene_set <- ensembl_gene_set[ match(geneset , ensembl_gene_set$mgi_symbol),]
indexes <- which(row.names(heatmap_counts_NObatch_deg_ord) %in% ensembl_gene_set$ensembl_id)
heatmap_counts_NObatch_deg_ord <- heatmap_counts_NObatch_deg_ord[indexes,]
heatmap_counts_NObatch_deg_ord <- heatmap_counts_NObatch_deg_ord[ match(ensembl_gene_set$ensembl_id , row.names(heatmap_counts_NObatch_deg_ord)),]
rownames(heatmap_counts_NObatch_deg_ord) <- ensembl_gene_set$mgi_symbol
color.scheme <- rev(RColorBrewer::brewer.pal(8,"RdBu")) # generate the color scheme to use
ann_colors = list(
experiment = c( EXP9.5 = "#005f73", EXP9.6 = "#0a9396", EXP9.7 = "#94d2bd"),
condition_tp = c(Tumor_only_timepoint_1 = "#DD3344", Tumor_plus_NK_timepoint_1 = "#FF9F1C", Tumor_plus_NK_timepoint_2 = "#553388"),
cell_line_label = c(A = "#E9D8A6", B = "#D9BE6D")
)
heatmap_corrected <- pheatmap::pheatmap(heatmap_counts_NObatch_deg_ord,
main = "Heatmap of signif. DEG - Batch Corrected",
scale = "row",
annotation_col = annotation_col,
annotation_colors = ann_colors,
#annotation_row = row_annot_symbols,
show_colnames = FALSE,
show_rownames = TRUE,
cluster_cols = FALSE,
cluster_rows = FALSE,
color = color.scheme,
fontsize = 10, fontsize_row = 10 #height=10, cellwidth = 11, cellheight = 11
) <img src=“/home/rstudio/workspace/nk_tum_immunoediting_files/figure-html/03 AB_CD”heatmap - overlap of genes from AB and CD”-1.png” width=“672” />
ggsave(filename = paste0(deg_dir, "publication/", "TpNK_tp2_vs_Tonly_tp1_heatmap_corrected_small_shared_gene_set_padj.eps"),
plot=heatmap_corrected,
width = 30, height = 14, units = "cm")
ggsave(filename = paste0(deg_dir, "publication/", "TpNK_tp2_vs_Tonly_tp1_heatmap_corrected_small_shared_gene_set_padj.png"),
plot=heatmap_corrected,
width = 30, height = 14, units = "cm")
##### The same but for CD #####
deg_name <- "TpNK_tp2_vs_Tonly_tp1_CD"
deg_dir <- file.path(output_dir, paste0("deg_", deg_name, "/"))
precomputed_objects_deg_filename <- paste0(deg_name, "_dds_objects.RData")
precomputed_objects_deg_file <- file.path(deg_dir, precomputed_objects_deg_filename)
precomputed_transf_objects_deg_filename <- paste0(deg_name, "_log2_vsd_filt_objects.RData")
precomputed_transf_objects_deg_file <- file.path(deg_dir, precomputed_transf_objects_deg_filename)
load(precomputed_objects_deg_file)
load(precomputed_transf_objects_deg_file)
transf_object <- vsd_subset_filt
transf_batch_NObatch_experiment <- vsd_subset_filt
transf_batch_NObatch_experiment_count <- limma::removeBatchEffect(SummarizedExperiment::assay(transf_batch_NObatch_experiment), transf_batch_NObatch_experiment$experiment)
SummarizedExperiment::assay(transf_batch_NObatch_experiment) <- transf_batch_NObatch_experiment_count
metadata_heatmap <- as.data.frame(colData(dds_subset))
heatmap_counts <- SummarizedExperiment::assay(transf_object)
heatmap_counts_NObatch <- SummarizedExperiment::assay(transf_batch_NObatch_experiment) # removed xperiment batch effects! ? use experimen+cell_line removed???
heatmap_counts_deg <- heatmap_counts[rownames(heatmap_counts) %in% deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$ensembl_id, ]
heatmap_counts_NObatch_deg <- heatmap_counts_NObatch[rownames(heatmap_counts_NObatch) %in% deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$ensembl_id, ]
annotation_col <- metadata_heatmap %>%
dplyr::select(experiment, cell_line_label, condition_tp) %>% # condition, timepoint_cell_harvesting,
dplyr::arrange(condition_tp, cell_line_label)
heatmap_counts_deg_ord <- heatmap_counts_deg[, match(rownames(annotation_col), colnames(heatmap_counts_deg))]
heatmap_counts_NObatch_deg_ord <- heatmap_counts_NObatch_deg[, match(rownames(annotation_col), colnames(heatmap_counts_NObatch_deg))]
ensembl2symbol_annot <- deg_TpNK_tp2_vs_Tonly_tp1_results$results_all %>%
dplyr::select(ensembl_id, mgi_symbol)
ensembl_gene_set <- ensembl2symbol_annot[which(ensembl2symbol_annot$mgi_symbol %in% geneset),]
ensembl_gene_set <- ensembl_gene_set[ match(geneset , ensembl_gene_set$mgi_symbol),]
indexes <- which(row.names(heatmap_counts_NObatch_deg_ord) %in% ensembl_gene_set$ensembl_id)
heatmap_counts_NObatch_deg_ord <- heatmap_counts_NObatch_deg_ord[indexes,]
heatmap_counts_NObatch_deg_ord <- heatmap_counts_NObatch_deg_ord[ match(ensembl_gene_set$ensembl_id , row.names(heatmap_counts_NObatch_deg_ord)),]
rownames(heatmap_counts_NObatch_deg_ord) <- ensembl_gene_set$mgi_symbol
color.scheme <- rev(RColorBrewer::brewer.pal(8,"RdBu")) # generate the color scheme to use
ann_colors = list(
experiment = c(EXP9.4 = "#00262E", EXP9.5 = "#005f73", EXP9.6 = "#0a9396", EXP9.7 = "#94d2bd"),
condition_tp = c(Tumor_only_timepoint_1 = "#DD3344", Tumor_plus_NK_timepoint_1 = "#FF9F1C", Tumor_plus_NK_timepoint_2 = "#553388"),
cell_line_label = c(C = "#E9D8A6", D = "#D9BE6D")
)
heatmap_corrected <- pheatmap::pheatmap(heatmap_counts_NObatch_deg_ord,
main = "Heatmap of signif. DEG - Batch Corrected",
scale = "row",
annotation_col = annotation_col,
annotation_colors = ann_colors,
#annotation_row = row_annot_symbols,
show_colnames = FALSE,
show_rownames = TRUE,
cluster_cols = FALSE,
cluster_rows = FALSE,
color = color.scheme,
fontsize = 10, fontsize_row = 10 #height=10, cellwidth = 11, cellheight = 11
) <img src=“/home/rstudio/workspace/nk_tum_immunoediting_files/figure-html/03 AB_CD”heatmap - overlap of genes from AB and CD”-2.png” width=“672” />
ggsave(filename = paste0(deg_dir, "publication/", "TpNK_tp2_vs_Tonly_tp1_heatmap_corrected_small_shared_gene_set_padj.eps"),
plot=heatmap_corrected,
width = 30, height = 14, units = "cm")
ggsave(filename = paste0(deg_dir, "publication/", "TpNK_tp2_vs_Tonly_tp1_heatmap_corrected_small_shared_gene_set_padj.png"),
plot=heatmap_corrected,
width = 30, height = 14, units = "cm")#First, I need to load full Dataset:
experiment_name="nk_tum_immunoedit_complete"
output_dir <- "/home/rstudio/workspace/results_dir/nk_tum_immunoedit_complete/"
precomputed_objects_filename <- paste0(experiment_name, "_dds_objects.RData")
precomputed_objects_file <- file.path(output_dir, precomputed_objects_filename)
# log2 and vsd transformed
precomputed_transf_objects_filename <- paste0(experiment_name, "_log2_vsd_filt_objects.RData")
precomputed_transf_objects_file <- file.path(output_dir, precomputed_transf_objects_filename)
# check if object loaded if not load
if(!exists(precomputed_objects_file)){
load(precomputed_objects_file)
load(precomputed_transf_objects_file)
}
experiment_subset <- c("EXP9.4", "EXP9.5", "EXP9.6", "EXP9.7")
condition_tp_subset <- c("Tumor_only_timepoint_1",
"Tumor_plus_NK_timepoint_2",
"Tumor_plus_NK_timepoint_1",
"Tumor_plus_WT_NK_timepoint_2")
abs_filt_samples=3
deg_design <- as.formula("~ experiment + cell_line_label + condition_tp")
dds_subset <- dds_filt[ , dds_filt$condition_tp %in% condition_tp_subset]
dds_subset <- dds_subset[ , dds_subset$experiment %in% experiment_subset]
dds_subset$condition_tp <- droplevels(dds_subset$condition_tp)
table(dds_subset$condition_tp)##
## Tumor_only_timepoint_1 Tumor_plus_NK_timepoint_1 Tumor_plus_NK_timepoint_2
## 42 42 41
dds_subset$experiment <- droplevels(dds_subset$experiment)
table(dds_subset$experiment)##
## EXP9.4 EXP9.5 EXP9.6 EXP9.7
## 18 36 35 36
dds_subset$cell_line_label <- droplevels(dds_subset$cell_line_label)
table(dds_subset$cell_line_label)##
## A B C D
## 27 27 36 35
dds_subset_filt <- filterDatasets(dds_subset,
abs_filt = TRUE,
abs_filt_samples = abs_filt_samples) # at least in N samples, which is a smallest group size## Original dds object samples: 125 genes: 15430
## Minimum number of samples with expression: 3
## Number of filtered genes: 433
## Filtered dds object has samples: 125 genes: 14997
cat("... dds\n")## ... dds
design(dds_subset_filt) <- deg_design
dds_subset_filt <- DESeq2::estimateSizeFactors(dds_subset_filt)
dds_subset_filt <- DESeq2::DESeq(dds_subset_filt) # do not replace outliers based on replicates
log2_norm_subset_filt <- DESeq2::normTransform(dds_subset_filt)
vsd_subset_filt <- DESeq2::vst(dds_subset_filt, blind = FALSE)
transf_batch_NObatch_experiment <- vsd_subset_filt
transf_batch_NObatch_experiment_count <- limma::removeBatchEffect(SummarizedExperiment::assay(transf_batch_NObatch_experiment), transf_batch_NObatch_experiment$experiment)
SummarizedExperiment::assay(transf_batch_NObatch_experiment) <- transf_batch_NObatch_experiment_count
deg_TpNK_tp2_vs_Tonly_tp1_results <- generateResults_upd(dds_object = dds_subset_filt,
coeff_name = "condition_tp_Tumor_plus_NK_timepoint_2_vs_Tumor_only_timepoint_1",
cond_numerator = "Tumor_plus_NK_timepoint_2",
cond_denominator = "Tumor_only_timepoint_1",
cond_variable="condition_tp")
deg_TpNK_tp2_vs_Tonly_tp1_results## $results_signif
## ensembl_id mgi_symbol mgi_description entrezgene
## 1 ENSMUSG00000075602 Ly6a lymphocyte antigen 6 complex, locus A 110454
## 2 ENSMUSG00000060675 Plaat3 phospholipase A and acyltransferase 3 225845
## 3 ENSMUSG00000033880 Lgals3bp lectin, galactoside-binding, soluble, 3 binding protein 19039
## 4 ENSMUSG00000064215 Ifi27 interferon, alpha-inducible protein 27 52668
## 5 ENSMUSG00000104713 Gbp6 guanylate binding protein 6 100702
## 6 ENSMUSG00000033355 Rtp4 receptor transporter protein 4 67775
## 7 ENSMUSG00000026104 Stat1 signal transducer and activator of transcription 1 20846
## 8 ENSMUSG00000054072 Iigp1 interferon inducible GTPase 1 60440
## 9 ENSMUSG00000061232 H2-K1 histocompatibility 2, K1, K region 14972
## 10 ENSMUSG00000073555 Gm4951 predicted gene 4951 240327
## baseMean MeanExpr_Tumor_only_timepoint_1 MeanExpr_Tumor_plus_NK_timepoint_2 log2FoldChange
## 1 2456.40621 317.387691 3080.57543 3.2258807
## 2 242.32525 61.223411 324.05583 2.3592201
## 3 96.84177 11.627393 119.52526 3.3142561
## 4 364.12250 153.624891 426.71025 1.4770706
## 5 128.50281 6.047601 135.68641 4.1428494
## 6 39.88500 3.053751 46.64006 3.7056800
## 7 342.79939 79.385613 393.15858 1.9873128
## 8 1112.75679 42.307708 1175.32669 4.3614229
## 9 4893.62031 3035.265875 5424.20556 0.8243463
## 10 95.71447 10.450136 109.08363 2.9734537
## lfcSE FoldChange pvalue padj gene_biotype chromosome_name start_position
## 1 0.10740032 9.355928 9.066006e-206 1.331071e-201 protein_coding 15 74994877
## 2 0.09136706 5.130929 2.798105e-150 2.054089e-146 protein_coding 19 7557459
## 3 0.15494094 9.946963 1.049868e-103 5.138053e-100 protein_coding 11 118392751
## 4 0.06968169 2.783829 6.176427e-102 2.267058e-98 protein_coding 12 103434211
## 5 0.23046748 17.665338 3.423172e-77 1.005180e-73 protein_coding 5 105270702
## 6 0.21981886 13.047305 6.073340e-65 1.486146e-61 protein_coding 16 23520291
## 7 0.12601002 3.964978 5.014857e-61 1.051830e-57 protein_coding 1 52119440
## 8 0.29663413 20.555077 5.585136e-58 1.025012e-54 protein_coding 18 60376027
## 9 0.05356110 1.770732 6.395728e-55 1.043356e-51 protein_coding 17 33996017
## 10 0.20013176 7.854142 1.474542e-54 2.164923e-51 protein_coding 18 60212080
## end_position strand Tumor_only_timepoint_1-S_4_R0108 Tumor_only_timepoint_1-S_5_R0108
## 1 74998031 -1 226.3811564 341.817725
## 2 7588545 1 28.0879052 43.238856
## 3 118402092 -1 1.7388754 14.990335
## 4 103440239 1 98.6588702 122.576546
## 5 105293698 -1 1.7271571 14.889315
## 6 23614222 1 0.8639205 5.319718
## 7 52161865 1 74.6331721 79.944903
## 8 60392634 1 29.1684966 69.538617
## 9 34000347 -1 2565.3386318 2781.017989
## 10 60247820 1 7.8307557 15.001477
## Tumor_only_timepoint_1-S_6_R0108 Tumor_only_timepoint_1-S_7_R0108
## 1 288.630468 148.275585
## 2 39.019215 34.333732
## 3 6.625686 13.855399
## 4 100.911393 160.638370
## 5 3.290518 5.004374
## 6 0.000000 2.503178
## 7 61.756281 70.215071
## 8 59.599173 54.614600
## 9 2583.232844 2773.171511
## 10 16.576527 5.042072
## Tumor_only_timepoint_1-S_8_R0108 Tumor_only_timepoint_1-S_9_R0108
## 1 273.597302 161.953437
## 2 65.456486 53.728279
## 3 18.428030 6.908138
## 4 155.662949 139.898918
## 5 12.920360 0.000000
## 6 7.539861 6.864301
## 7 109.368560 91.134837
## 8 118.646154 30.616653
## 9 2779.194878 2806.570115
## 10 10.848075 11.522122
## Tumor_only_timepoint_1-S_34_R0108 Tumor_only_timepoint_1-S_35_R0108
## 1 297.822212 351.120560
## 2 64.761167 60.833866
## 3 6.168086 4.108498
## 4 75.784308 108.644603
## 5 6.126519 5.441081
## 6 1.225789 4.082427
## 7 71.821417 79.658971
## 8 30.861415 41.956265
## 9 3064.580619 3345.985933
## 10 9.876273 8.223104
## Tumor_only_timepoint_1-S_36_R0108 Tumor_only_timepoint_1-S_37_R0108
## 1 420.097841 290.666139
## 2 47.844025 43.135620
## 3 8.124189 2.359208
## 4 78.863488 126.747439
## 5 16.138880 0.000000
## 6 6.727196 0.000000
## 7 106.799762 101.535313
## 8 41.444818 27.141134
## 9 2950.083088 3153.145857
## 10 18.970531 2.360961
## Tumor_only_timepoint_1-S_38_R0108 Tumor_only_timepoint_1-S_39_R0108
## 1 402.564866 406.966531
## 2 57.841038 84.857099
## 3 9.171110 19.052192
## 4 178.052411 166.780680
## 5 7.085016 6.678988
## 6 2.025092 4.454422
## 7 97.821519 96.012927
## 8 82.837569 97.044997
## 9 3091.523270 3465.008042
## 10 12.237236 12.337052
## Tumor_only_timepoint_1-S_40_R0108 Tumor_only_timepoint_1-S_41_R0108
## 1 316.059288 457.71021
## 2 55.206801 64.08933
## 3 16.929106 17.31344
## 4 139.644242 135.22537
## 5 6.005364 9.55376
## 6 4.806194 6.69028
## 7 52.263737 88.42372
## 8 28.980145 101.49043
## 9 2911.020108 3081.24480
## 10 14.521448 23.10175
## Tumor_only_timepoint_1-S_42_R0108 Tumor_only_timepoint_1-S_43_R0108
## 1 480.48704 199.771467
## 2 54.28901 81.112141
## 3 32.73265 20.566199
## 4 185.68004 215.221101
## 5 19.73947 9.676233
## 6 4.64642 3.226688
## 7 103.96059 87.287023
## 8 104.66037 34.836180
## 9 3006.12358 3010.527465
## 10 14.03871 11.915597
## Tumor_only_timepoint_1-S_44_R0108 Tumor_only_timepoint_1-S_45_R0108
## 1 229.257848 203.016839
## 2 101.769822 60.487194
## 3 19.150569 18.594181
## 4 203.126981 201.245183
## 5 0.000000 0.000000
## 6 1.189315 8.211639
## 7 95.951230 66.074768
## 8 44.097532 36.432202
## 9 2601.755822 3026.664063
## 10 5.989001 8.270223
## Tumor_only_timepoint_1-S_70_R0108 Tumor_only_timepoint_1-S_71_R0108
## 1 440.139715 323.892923
## 2 48.336005 57.581463
## 3 11.796622 9.871678
## 4 110.169108 89.218043
## 5 15.232262 8.715691
## 6 1.172176 1.089893
## 7 100.600029 66.246309
## 8 158.691062 73.283414
## 9 2943.392418 3247.204251
## 10 7.083234 7.683679
## Tumor_only_timepoint_1-S_72_R0108 Tumor_only_timepoint_1-S_73_R0108
## 1 265.136089 459.069572
## 2 28.606686 43.864671
## 3 6.839694 19.749736
## 4 111.856885 106.104620
## 5 0.000000 22.559139
## 6 3.883595 4.906103
## 7 94.042735 90.517228
## 8 37.192036 104.972199
## 9 2796.872346 2890.697668
## 10 4.889127 13.835091
## Tumor_only_timepoint_1-S_74_R0108 Tumor_only_timepoint_1-S_75_R0108
## 1 354.799205 318.324971
## 2 57.523615 59.434830
## 3 5.907787 4.709775
## 4 85.429245 83.949419
## 5 3.520785 7.017053
## 6 1.174060 2.339944
## 7 84.495940 81.791847
## 8 14.314338 39.161395
## 9 2474.143255 2743.189753
## 10 15.371664 10.604869
## Tumor_only_timepoint_1-S_76_R0108 Tumor_only_timepoint_1-S_77_R0108
## 1 174.547255 289.569693
## 2 70.389334 88.539703
## 3 6.577640 13.916724
## 4 100.399813 165.883122
## 5 0.000000 15.797644
## 6 5.228720 5.926462
## 7 53.714433 82.068469
## 8 23.102968 86.155960
## 9 2747.646997 3257.905502
## 10 3.949517 11.937487
## Tumor_only_timepoint_1-S_78_R0108 Tumor_only_timepoint_1-S_79_R0108
## 1 226.481125 217.422651
## 2 52.089351 71.157408
## 3 6.879499 12.983855
## 4 114.182699 178.061376
## 5 11.388562 8.290515
## 6 1.139307 9.215331
## 7 74.665224 70.239239
## 8 43.979256 44.271868
## 9 2742.242177 3008.792871
## 10 11.474353 9.281076
## Tumor_only_timepoint_1-S_80_R0108 Tumor_only_timepoint_1-S_81_R0108
## 1 237.785720 199.559990
## 2 65.070061 73.968560
## 3 9.310747 13.759217
## 4 193.700233 226.385809
## 5 6.473601 9.461419
## 6 3.700666 7.361795
## 7 81.003262 83.840786
## 8 48.784798 33.382367
## 9 2553.959914 2921.514493
## 10 11.181201 10.591880
## Tumor_only_timepoint_1-S_1_R0114_L5311 Tumor_only_timepoint_1-S_2_R0114_L5311
## 1 505.9655486 534.8699989
## 2 60.9952416 83.8498816
## 3 6.0972113 9.4535404
## 4 138.0414336 148.5011370
## 5 1.1011131 1.1177423
## 6 0.5507746 0.6709679
## 7 88.9047202 39.0957467
## 8 1.5496474 4.3073388
## 9 4343.4360644 4697.6721201
## 10 6.1017433 1.3515096
## Tumor_only_timepoint_1-S_3_R0114_L5311 Tumor_only_timepoint_1-S_4_R0114_L5311
## 1 398.278202 349.7592985
## 2 81.552390 20.4637753
## 3 0.000000 12.7626253
## 4 125.031832 117.7537527
## 5 4.732983 2.2370502
## 6 0.000000 0.0000000
## 7 74.366562 76.4479256
## 8 3.227663 0.6996231
## 9 3541.985255 2258.0354224
## 10 3.577009 6.7617062
## Tumor_only_timepoint_1-S_5_R0114_L5311 Tumor_only_timepoint_1-S_6_R0114_L5311
## 1 400.4039886 378.5871716
## 2 36.2312671 32.1178943
## 3 3.6620692 0.7574725
## 4 150.1706045 186.3597761
## 5 0.7274781 0.0000000
## 6 2.1832985 0.0000000
## 7 83.9293788 81.2914481
## 8 5.2486655 4.9412632
## 9 3216.1610799 2904.2991488
## 10 16.8580396 5.3062488
## Tumor_only_timepoint_1-S_7_R0114_L5311 Tumor_only_timepoint_1-S_8_R0114_L5311
## 1 405.189817 345.041272
## 2 136.987058 66.942635
## 3 27.320185 11.367242
## 4 270.690680 216.884210
## 5 2.713607 1.254515
## 6 0.000000 0.000000
## 7 56.265323 64.204041
## 8 0.000000 1.177025
## 9 4556.187967 2642.948039
## 10 17.315645 6.319829
## Tumor_only_timepoint_1-S_9_R0114_L5311 Tumor_only_timepoint_1-S_10_R0114_L5311
## 1 321.561946 303.5678649
## 2 64.563574 57.1540398
## 3 4.263492 10.2217886
## 4 189.936624 226.7748238
## 5 4.940553 0.0000000
## 6 0.000000 1.5626037
## 7 65.269987 83.8668240
## 8 5.284401 0.7327513
## 9 3465.607292 3154.0607381
## 10 4.977771 5.5081311
## Tumor_only_timepoint_1-S_11_R0114_L5311 Tumor_only_timepoint_1-S_12_R0114_L5311
## 1 177.4169334 206.715578
## 2 85.4001696 88.472046
## 3 26.9457231 16.380283
## 4 240.2594438 283.137834
## 5 0.0000000 2.440484
## 6 0.8924911 4.882902
## 7 62.2162061 60.448264
## 8 3.3481249 5.148823
## 9 2508.3364761 2868.686873
## 10 25.1680349 13.113966
## Tumor_plus_NK_timepoint_2-S_16_R0108 Tumor_plus_NK_timepoint_2-S_17_R0108
## 1 1747.63424 2043.64240
## 2 181.27471 176.89550
## 3 84.16812 115.67506
## 4 379.78296 376.28100
## 5 28.98165 32.40643
## 6 42.37457 63.85612
## 7 122.06926 141.00851
## 8 106.58488 116.17965
## 9 6905.47407 6449.94761
## 10 24.70767 37.59760
## Tumor_plus_NK_timepoint_2-S_18_R0108 Tumor_plus_NK_timepoint_2-S_19_R0108
## 1 1961.66038 2169.31465
## 2 208.42627 252.79548
## 3 131.79911 126.70198
## 4 393.85502 640.51907
## 5 38.00639 67.76438
## 6 63.36908 58.10676
## 7 166.78004 210.28623
## 8 203.35784 306.30693
## 9 6295.43975 5827.26955
## 10 44.67482 32.51184
## Tumor_plus_NK_timepoint_2-S_20_R0108 Tumor_plus_NK_timepoint_2-S_21_R0108
## 1 1663.59370 1719.85123
## 2 297.94389 307.26016
## 3 148.16929 124.86695
## 4 566.98286 551.29567
## 5 40.31177 56.01150
## 6 59.30549 44.02647
## 7 197.19845 203.08716
## 8 240.53894 271.33505
## 9 5807.39212 5961.36343
## 10 27.47515 40.30960
## Tumor_plus_NK_timepoint_2-S_58_R0108 Tumor_plus_NK_timepoint_2-S_59_R0108
## 1 6488.80633 6112.69599
## 2 503.96062 519.00145
## 3 197.54182 172.40962
## 4 600.59927 521.49861
## 5 549.18734 461.83058
## 6 82.96721 61.25829
## 7 1193.43729 1284.76246
## 8 4667.39493 4185.81629
## 9 6607.43373 6610.74395
## 10 335.25549 308.47662
## Tumor_plus_NK_timepoint_2-S_60_R0108 Tumor_plus_NK_timepoint_2-S_61_R0108
## 1 6934.22563 2570.28564
## 2 517.09769 326.79839
## 3 141.77686 120.59119
## 4 608.79137 319.08203
## 5 528.39189 77.95110
## 6 57.34824 23.77499
## 7 1221.99259 292.64331
## 8 4105.83752 573.06006
## 9 6155.02388 5435.19475
## 10 297.57603 69.91826
## Tumor_plus_NK_timepoint_2-S_62_R0108 Tumor_plus_NK_timepoint_2-S_63_R0108
## 1 3412.69962 2379.91330
## 2 331.81238 309.76795
## 3 128.80487 114.34307
## 4 353.07839 340.29375
## 5 176.34539 130.09215
## 6 17.29561 17.55907
## 7 384.82494 274.10965
## 8 1045.62747 621.84628
## 9 5600.59252 5799.93438
## 10 130.06187 100.90474
## Tumor_plus_NK_timepoint_2-S_64_R0108 Tumor_plus_NK_timepoint_2-S_65_R0108
## 1 4339.3412 4841.7061
## 2 301.9529 342.1000
## 3 213.3512 193.8010
## 4 443.5946 409.1050
## 5 146.5486 243.3223
## 6 147.5576 123.3321
## 7 606.5484 629.9075
## 8 2337.2736 2906.8185
## 9 6228.9387 5952.1540
## 10 212.5692 223.3637
## Tumor_plus_NK_timepoint_2-S_66_R0108 Tumor_plus_NK_timepoint_2-S_67_R0108
## 1 4053.6859 5411.1298
## 2 318.4661 499.6300
## 3 194.3493 286.8203
## 4 331.4864 617.1513
## 5 179.4221 349.3431
## 6 104.8067 156.6732
## 7 612.9029 971.6622
## 8 2402.8629 4499.3759
## 9 5620.9965 6559.9749
## 10 166.1504 379.5810
## Tumor_plus_NK_timepoint_2-S_68_R0108 Tumor_plus_NK_timepoint_2-S_69_R0108
## 1 5068.2301 5785.6571
## 2 571.7537 618.9396
## 3 212.2716 185.3203
## 4 647.7775 565.9427
## 5 290.4707 298.7725
## 6 121.8675 123.6037
## 7 715.3963 824.1747
## 8 3518.5242 3694.0642
## 9 5686.0770 5153.7038
## 10 276.1582 307.4031
## Tumor_plus_NK_timepoint_2-S_94_R0108 Tumor_plus_NK_timepoint_2-S_95_R0108
## 1 2853.68834 3907.22481
## 2 315.31412 408.27858
## 3 98.55720 102.00342
## 4 376.87513 444.90837
## 5 136.49872 189.47599
## 6 12.41389 48.88943
## 7 369.86090 525.84934
## 8 582.78057 1490.16889
## 9 6266.28309 5556.06367
## 10 75.01471 102.07924
## Tumor_plus_NK_timepoint_2-S_96_R0108 Tumor_plus_NK_timepoint_2-S_97_R0108
## 1 3054.755772 1612.10441
## 2 343.314502 268.09793
## 3 48.835770 79.95366
## 4 407.082559 210.56451
## 5 127.347277 48.09632
## 6 7.643862 0.00000
## 7 368.781959 129.49845
## 8 556.921078 155.88130
## 9 5219.548551 4777.95969
## 10 80.833154 24.79279
## Tumor_plus_NK_timepoint_2-S_98_R0108 Tumor_plus_NK_timepoint_2-S_99_R0108
## 1 3161.130849 3365.620217
## 2 335.293619 353.560196
## 3 94.332083 91.340829
## 4 258.349255 233.479909
## 5 151.191882 143.746550
## 6 9.586379 4.714868
## 7 397.607171 488.790691
## 8 964.039135 1354.323735
## 9 4752.982702 5356.586333
## 10 101.911465 115.151248
## Tumor_plus_NK_timepoint_2-S_100_R0108 Tumor_plus_NK_timepoint_2-S_101_R0108
## 1 2248.20859 3693.7522
## 2 285.03959 343.1971
## 3 126.88052 119.1217
## 4 413.17662 357.6320
## 5 63.01274 143.9118
## 6 48.40394 50.0309
## 7 229.62176 406.1431
## 8 317.53985 1435.0022
## 9 5123.10203 5736.0080
## 10 41.94704 99.5467
## Tumor_plus_NK_timepoint_2-S_102_R0108 Tumor_plus_NK_timepoint_2-S_103_R0108
## 1 2497.46950 3970.89731
## 2 257.96566 393.79609
## 3 99.21820 230.91655
## 4 419.01959 561.65120
## 5 58.51867 146.73088
## 6 31.59401 83.58817
## 7 224.34658 568.56385
## 8 377.55128 1663.47315
## 9 4886.22860 5833.79498
## 10 59.89536 144.75629
## Tumor_plus_NK_timepoint_2-S_104_R0108 Tumor_plus_NK_timepoint_2-S_25_R0114_L5311
## 1 4100.50705 2451.84810
## 2 442.56640 275.66064
## 3 141.80837 24.51542
## 4 678.64953 438.98600
## 5 212.31066 53.73839
## 6 67.29958 5.03997
## 7 520.66380 152.25959
## 8 1819.02296 112.74574
## 9 6454.32469 6012.40889
## 10 140.85471 48.22130
## Tumor_plus_NK_timepoint_2-S_26_R0114_L5311 Tumor_plus_NK_timepoint_2-S_27_R0114_L5311
## 1 1413.182567 2255.761783
## 2 196.748054 230.346953
## 3 13.427171 22.182839
## 4 312.802010 398.735564
## 5 42.510684 23.607159
## 6 7.504856 8.796351
## 7 169.835020 135.128724
## 8 130.822084 87.631803
## 9 3255.278358 3864.183482
## 10 42.830920 12.685330
## Tumor_plus_NK_timepoint_2-S_28_R0114_L5311 Tumor_plus_NK_timepoint_2-S_29_R0114_L5311
## 1 1512.3967890 3662.44856
## 2 139.5622890 286.44692
## 3 57.1499529 142.65577
## 4 240.5679225 427.38939
## 5 31.4507777 128.38609
## 6 0.7673959 10.18098
## 7 119.9576377 369.55434
## 8 25.1898319 906.82660
## 9 3509.5858480 4351.23008
## 10 22.4132504 124.62080
## Tumor_plus_NK_timepoint_2-S_30_R0114_L5311 Tumor_plus_NK_timepoint_2-S_31_R0114_L5311
## 1 2026.09662 1379.749209
## 2 151.23444 166.924585
## 3 65.04070 64.058822
## 4 311.23758 310.013115
## 5 44.45756 29.130493
## 6 0.00000 9.202746
## 7 216.07682 122.625541
## 8 138.24193 50.223875
## 9 3933.19026 4127.948022
## 10 42.69281 32.439402
## Tumor_plus_NK_timepoint_2-S_32_R0114_L5311 Tumor_plus_NK_timepoint_2-S_33_R0114_L5311
## 1 1007.45324 1090.70706
## 2 121.56246 137.02984
## 3 22.86767 34.30605
## 4 299.26832 256.26347
## 5 12.45582 17.03743
## 6 11.72777 10.37472
## 7 85.44529 88.40889
## 8 22.68844 18.53216
## 9 3051.54335 3363.10199
## 10 34.69610 26.12183
## Tumor_plus_NK_timepoint_2-S_34_R0114_L5311 Tumor_plus_NK_timepoint_2-S_35_R0114_L5311
## 1 2545.25905 1889.68993
## 2 475.60652 375.51234
## 3 138.20748 91.00698
## 4 599.21008 413.73002
## 5 20.33720 16.11670
## 6 79.92778 25.93714
## 7 170.30992 117.15614
## 8 77.83998 52.85539
## 9 6938.32427 5117.73916
## 10 30.00380 25.41617
## Tumor_plus_NK_timepoint_2-S_36_R0114_L5311
## 1 1899.567509
## 2 397.353609
## 3 99.386914
## 4 458.410596
## 5 27.913123
## 6 9.535085
## 7 90.224031
## 8 45.286888
## 9 6247.357109
## 10 28.809331
## [ reached 'max' / getOption("max.print") -- omitted 480 rows ]
##
## $de_details
## test
## 1 condition_tp_Tumor_plus_NK_timepoint_2_vs_Tumor_only_timepoint_1
## design signif_genes signif_genes_UP signif_genes_DOWN
## 1 ~experiment + cell_line_label + condition_tp 490 330 160
## cutoffs
## 1 (!is.na(padj) & (padj < 0.05)) & abs(log2FoldChange) > 0.58
##
## $results_all
## ensembl_id mgi_symbol mgi_description entrezgene
## 1 ENSMUSG00000075602 Ly6a lymphocyte antigen 6 complex, locus A 110454
## 2 ENSMUSG00000060675 Plaat3 phospholipase A and acyltransferase 3 225845
## 3 ENSMUSG00000033880 Lgals3bp lectin, galactoside-binding, soluble, 3 binding protein 19039
## 4 ENSMUSG00000064215 Ifi27 interferon, alpha-inducible protein 27 52668
## 5 ENSMUSG00000104713 Gbp6 guanylate binding protein 6 100702
## 6 ENSMUSG00000033355 Rtp4 receptor transporter protein 4 67775
## 7 ENSMUSG00000026104 Stat1 signal transducer and activator of transcription 1 20846
## 8 ENSMUSG00000054072 Iigp1 interferon inducible GTPase 1 60440
## 9 ENSMUSG00000061232 H2-K1 histocompatibility 2, K1, K region 14972
## 10 ENSMUSG00000073555 Gm4951 predicted gene 4951 240327
## baseMean MeanExpr_Tumor_only_timepoint_1 MeanExpr_Tumor_plus_NK_timepoint_2 log2FoldChange
## 1 2456.40621 317.387691 3080.57543 3.2258807
## 2 242.32525 61.223411 324.05583 2.3592201
## 3 96.84177 11.627393 119.52526 3.3142561
## 4 364.12250 153.624891 426.71025 1.4770706
## 5 128.50281 6.047601 135.68641 4.1428494
## 6 39.88500 3.053751 46.64006 3.7056800
## 7 342.79939 79.385613 393.15858 1.9873128
## 8 1112.75679 42.307708 1175.32669 4.3614229
## 9 4893.62031 3035.265875 5424.20556 0.8243463
## 10 95.71447 10.450136 109.08363 2.9734537
## lfcSE FoldChange pvalue padj gene_biotype chromosome_name start_position
## 1 0.10740032 9.355928 9.066006e-206 1.331071e-201 protein_coding 15 74994877
## 2 0.09136706 5.130929 2.798105e-150 2.054089e-146 protein_coding 19 7557459
## 3 0.15494094 9.946963 1.049868e-103 5.138053e-100 protein_coding 11 118392751
## 4 0.06968169 2.783829 6.176427e-102 2.267058e-98 protein_coding 12 103434211
## 5 0.23046748 17.665338 3.423172e-77 1.005180e-73 protein_coding 5 105270702
## 6 0.21981886 13.047305 6.073340e-65 1.486146e-61 protein_coding 16 23520291
## 7 0.12601002 3.964978 5.014857e-61 1.051830e-57 protein_coding 1 52119440
## 8 0.29663413 20.555077 5.585136e-58 1.025012e-54 protein_coding 18 60376027
## 9 0.05356110 1.770732 6.395728e-55 1.043356e-51 protein_coding 17 33996017
## 10 0.20013176 7.854142 1.474542e-54 2.164923e-51 protein_coding 18 60212080
## end_position strand Tumor_only_timepoint_1-S_4_R0108 Tumor_only_timepoint_1-S_5_R0108
## 1 74998031 -1 226.3811564 341.817725
## 2 7588545 1 28.0879052 43.238856
## 3 118402092 -1 1.7388754 14.990335
## 4 103440239 1 98.6588702 122.576546
## 5 105293698 -1 1.7271571 14.889315
## 6 23614222 1 0.8639205 5.319718
## 7 52161865 1 74.6331721 79.944903
## 8 60392634 1 29.1684966 69.538617
## 9 34000347 -1 2565.3386318 2781.017989
## 10 60247820 1 7.8307557 15.001477
## Tumor_only_timepoint_1-S_6_R0108 Tumor_only_timepoint_1-S_7_R0108
## 1 288.630468 148.275585
## 2 39.019215 34.333732
## 3 6.625686 13.855399
## 4 100.911393 160.638370
## 5 3.290518 5.004374
## 6 0.000000 2.503178
## 7 61.756281 70.215071
## 8 59.599173 54.614600
## 9 2583.232844 2773.171511
## 10 16.576527 5.042072
## Tumor_only_timepoint_1-S_8_R0108 Tumor_only_timepoint_1-S_9_R0108
## 1 273.597302 161.953437
## 2 65.456486 53.728279
## 3 18.428030 6.908138
## 4 155.662949 139.898918
## 5 12.920360 0.000000
## 6 7.539861 6.864301
## 7 109.368560 91.134837
## 8 118.646154 30.616653
## 9 2779.194878 2806.570115
## 10 10.848075 11.522122
## Tumor_only_timepoint_1-S_34_R0108 Tumor_only_timepoint_1-S_35_R0108
## 1 297.822212 351.120560
## 2 64.761167 60.833866
## 3 6.168086 4.108498
## 4 75.784308 108.644603
## 5 6.126519 5.441081
## 6 1.225789 4.082427
## 7 71.821417 79.658971
## 8 30.861415 41.956265
## 9 3064.580619 3345.985933
## 10 9.876273 8.223104
## Tumor_only_timepoint_1-S_36_R0108 Tumor_only_timepoint_1-S_37_R0108
## 1 420.097841 290.666139
## 2 47.844025 43.135620
## 3 8.124189 2.359208
## 4 78.863488 126.747439
## 5 16.138880 0.000000
## 6 6.727196 0.000000
## 7 106.799762 101.535313
## 8 41.444818 27.141134
## 9 2950.083088 3153.145857
## 10 18.970531 2.360961
## Tumor_only_timepoint_1-S_38_R0108 Tumor_only_timepoint_1-S_39_R0108
## 1 402.564866 406.966531
## 2 57.841038 84.857099
## 3 9.171110 19.052192
## 4 178.052411 166.780680
## 5 7.085016 6.678988
## 6 2.025092 4.454422
## 7 97.821519 96.012927
## 8 82.837569 97.044997
## 9 3091.523270 3465.008042
## 10 12.237236 12.337052
## Tumor_only_timepoint_1-S_40_R0108 Tumor_only_timepoint_1-S_41_R0108
## 1 316.059288 457.71021
## 2 55.206801 64.08933
## 3 16.929106 17.31344
## 4 139.644242 135.22537
## 5 6.005364 9.55376
## 6 4.806194 6.69028
## 7 52.263737 88.42372
## 8 28.980145 101.49043
## 9 2911.020108 3081.24480
## 10 14.521448 23.10175
## Tumor_only_timepoint_1-S_42_R0108 Tumor_only_timepoint_1-S_43_R0108
## 1 480.48704 199.771467
## 2 54.28901 81.112141
## 3 32.73265 20.566199
## 4 185.68004 215.221101
## 5 19.73947 9.676233
## 6 4.64642 3.226688
## 7 103.96059 87.287023
## 8 104.66037 34.836180
## 9 3006.12358 3010.527465
## 10 14.03871 11.915597
## Tumor_only_timepoint_1-S_44_R0108 Tumor_only_timepoint_1-S_45_R0108
## 1 229.257848 203.016839
## 2 101.769822 60.487194
## 3 19.150569 18.594181
## 4 203.126981 201.245183
## 5 0.000000 0.000000
## 6 1.189315 8.211639
## 7 95.951230 66.074768
## 8 44.097532 36.432202
## 9 2601.755822 3026.664063
## 10 5.989001 8.270223
## Tumor_only_timepoint_1-S_70_R0108 Tumor_only_timepoint_1-S_71_R0108
## 1 440.139715 323.892923
## 2 48.336005 57.581463
## 3 11.796622 9.871678
## 4 110.169108 89.218043
## 5 15.232262 8.715691
## 6 1.172176 1.089893
## 7 100.600029 66.246309
## 8 158.691062 73.283414
## 9 2943.392418 3247.204251
## 10 7.083234 7.683679
## Tumor_only_timepoint_1-S_72_R0108 Tumor_only_timepoint_1-S_73_R0108
## 1 265.136089 459.069572
## 2 28.606686 43.864671
## 3 6.839694 19.749736
## 4 111.856885 106.104620
## 5 0.000000 22.559139
## 6 3.883595 4.906103
## 7 94.042735 90.517228
## 8 37.192036 104.972199
## 9 2796.872346 2890.697668
## 10 4.889127 13.835091
## Tumor_only_timepoint_1-S_74_R0108 Tumor_only_timepoint_1-S_75_R0108
## 1 354.799205 318.324971
## 2 57.523615 59.434830
## 3 5.907787 4.709775
## 4 85.429245 83.949419
## 5 3.520785 7.017053
## 6 1.174060 2.339944
## 7 84.495940 81.791847
## 8 14.314338 39.161395
## 9 2474.143255 2743.189753
## 10 15.371664 10.604869
## Tumor_only_timepoint_1-S_76_R0108 Tumor_only_timepoint_1-S_77_R0108
## 1 174.547255 289.569693
## 2 70.389334 88.539703
## 3 6.577640 13.916724
## 4 100.399813 165.883122
## 5 0.000000 15.797644
## 6 5.228720 5.926462
## 7 53.714433 82.068469
## 8 23.102968 86.155960
## 9 2747.646997 3257.905502
## 10 3.949517 11.937487
## Tumor_only_timepoint_1-S_78_R0108 Tumor_only_timepoint_1-S_79_R0108
## 1 226.481125 217.422651
## 2 52.089351 71.157408
## 3 6.879499 12.983855
## 4 114.182699 178.061376
## 5 11.388562 8.290515
## 6 1.139307 9.215331
## 7 74.665224 70.239239
## 8 43.979256 44.271868
## 9 2742.242177 3008.792871
## 10 11.474353 9.281076
## Tumor_only_timepoint_1-S_80_R0108 Tumor_only_timepoint_1-S_81_R0108
## 1 237.785720 199.559990
## 2 65.070061 73.968560
## 3 9.310747 13.759217
## 4 193.700233 226.385809
## 5 6.473601 9.461419
## 6 3.700666 7.361795
## 7 81.003262 83.840786
## 8 48.784798 33.382367
## 9 2553.959914 2921.514493
## 10 11.181201 10.591880
## Tumor_only_timepoint_1-S_1_R0114_L5311 Tumor_only_timepoint_1-S_2_R0114_L5311
## 1 505.9655486 534.8699989
## 2 60.9952416 83.8498816
## 3 6.0972113 9.4535404
## 4 138.0414336 148.5011370
## 5 1.1011131 1.1177423
## 6 0.5507746 0.6709679
## 7 88.9047202 39.0957467
## 8 1.5496474 4.3073388
## 9 4343.4360644 4697.6721201
## 10 6.1017433 1.3515096
## Tumor_only_timepoint_1-S_3_R0114_L5311 Tumor_only_timepoint_1-S_4_R0114_L5311
## 1 398.278202 349.7592985
## 2 81.552390 20.4637753
## 3 0.000000 12.7626253
## 4 125.031832 117.7537527
## 5 4.732983 2.2370502
## 6 0.000000 0.0000000
## 7 74.366562 76.4479256
## 8 3.227663 0.6996231
## 9 3541.985255 2258.0354224
## 10 3.577009 6.7617062
## Tumor_only_timepoint_1-S_5_R0114_L5311 Tumor_only_timepoint_1-S_6_R0114_L5311
## 1 400.4039886 378.5871716
## 2 36.2312671 32.1178943
## 3 3.6620692 0.7574725
## 4 150.1706045 186.3597761
## 5 0.7274781 0.0000000
## 6 2.1832985 0.0000000
## 7 83.9293788 81.2914481
## 8 5.2486655 4.9412632
## 9 3216.1610799 2904.2991488
## 10 16.8580396 5.3062488
## Tumor_only_timepoint_1-S_7_R0114_L5311 Tumor_only_timepoint_1-S_8_R0114_L5311
## 1 405.189817 345.041272
## 2 136.987058 66.942635
## 3 27.320185 11.367242
## 4 270.690680 216.884210
## 5 2.713607 1.254515
## 6 0.000000 0.000000
## 7 56.265323 64.204041
## 8 0.000000 1.177025
## 9 4556.187967 2642.948039
## 10 17.315645 6.319829
## Tumor_only_timepoint_1-S_9_R0114_L5311 Tumor_only_timepoint_1-S_10_R0114_L5311
## 1 321.561946 303.5678649
## 2 64.563574 57.1540398
## 3 4.263492 10.2217886
## 4 189.936624 226.7748238
## 5 4.940553 0.0000000
## 6 0.000000 1.5626037
## 7 65.269987 83.8668240
## 8 5.284401 0.7327513
## 9 3465.607292 3154.0607381
## 10 4.977771 5.5081311
## Tumor_only_timepoint_1-S_11_R0114_L5311 Tumor_only_timepoint_1-S_12_R0114_L5311
## 1 177.4169334 206.715578
## 2 85.4001696 88.472046
## 3 26.9457231 16.380283
## 4 240.2594438 283.137834
## 5 0.0000000 2.440484
## 6 0.8924911 4.882902
## 7 62.2162061 60.448264
## 8 3.3481249 5.148823
## 9 2508.3364761 2868.686873
## 10 25.1680349 13.113966
## Tumor_plus_NK_timepoint_2-S_16_R0108 Tumor_plus_NK_timepoint_2-S_17_R0108
## 1 1747.63424 2043.64240
## 2 181.27471 176.89550
## 3 84.16812 115.67506
## 4 379.78296 376.28100
## 5 28.98165 32.40643
## 6 42.37457 63.85612
## 7 122.06926 141.00851
## 8 106.58488 116.17965
## 9 6905.47407 6449.94761
## 10 24.70767 37.59760
## Tumor_plus_NK_timepoint_2-S_18_R0108 Tumor_plus_NK_timepoint_2-S_19_R0108
## 1 1961.66038 2169.31465
## 2 208.42627 252.79548
## 3 131.79911 126.70198
## 4 393.85502 640.51907
## 5 38.00639 67.76438
## 6 63.36908 58.10676
## 7 166.78004 210.28623
## 8 203.35784 306.30693
## 9 6295.43975 5827.26955
## 10 44.67482 32.51184
## Tumor_plus_NK_timepoint_2-S_20_R0108 Tumor_plus_NK_timepoint_2-S_21_R0108
## 1 1663.59370 1719.85123
## 2 297.94389 307.26016
## 3 148.16929 124.86695
## 4 566.98286 551.29567
## 5 40.31177 56.01150
## 6 59.30549 44.02647
## 7 197.19845 203.08716
## 8 240.53894 271.33505
## 9 5807.39212 5961.36343
## 10 27.47515 40.30960
## Tumor_plus_NK_timepoint_2-S_58_R0108 Tumor_plus_NK_timepoint_2-S_59_R0108
## 1 6488.80633 6112.69599
## 2 503.96062 519.00145
## 3 197.54182 172.40962
## 4 600.59927 521.49861
## 5 549.18734 461.83058
## 6 82.96721 61.25829
## 7 1193.43729 1284.76246
## 8 4667.39493 4185.81629
## 9 6607.43373 6610.74395
## 10 335.25549 308.47662
## Tumor_plus_NK_timepoint_2-S_60_R0108 Tumor_plus_NK_timepoint_2-S_61_R0108
## 1 6934.22563 2570.28564
## 2 517.09769 326.79839
## 3 141.77686 120.59119
## 4 608.79137 319.08203
## 5 528.39189 77.95110
## 6 57.34824 23.77499
## 7 1221.99259 292.64331
## 8 4105.83752 573.06006
## 9 6155.02388 5435.19475
## 10 297.57603 69.91826
## Tumor_plus_NK_timepoint_2-S_62_R0108 Tumor_plus_NK_timepoint_2-S_63_R0108
## 1 3412.69962 2379.91330
## 2 331.81238 309.76795
## 3 128.80487 114.34307
## 4 353.07839 340.29375
## 5 176.34539 130.09215
## 6 17.29561 17.55907
## 7 384.82494 274.10965
## 8 1045.62747 621.84628
## 9 5600.59252 5799.93438
## 10 130.06187 100.90474
## Tumor_plus_NK_timepoint_2-S_64_R0108 Tumor_plus_NK_timepoint_2-S_65_R0108
## 1 4339.3412 4841.7061
## 2 301.9529 342.1000
## 3 213.3512 193.8010
## 4 443.5946 409.1050
## 5 146.5486 243.3223
## 6 147.5576 123.3321
## 7 606.5484 629.9075
## 8 2337.2736 2906.8185
## 9 6228.9387 5952.1540
## 10 212.5692 223.3637
## Tumor_plus_NK_timepoint_2-S_66_R0108 Tumor_plus_NK_timepoint_2-S_67_R0108
## 1 4053.6859 5411.1298
## 2 318.4661 499.6300
## 3 194.3493 286.8203
## 4 331.4864 617.1513
## 5 179.4221 349.3431
## 6 104.8067 156.6732
## 7 612.9029 971.6622
## 8 2402.8629 4499.3759
## 9 5620.9965 6559.9749
## 10 166.1504 379.5810
## Tumor_plus_NK_timepoint_2-S_68_R0108 Tumor_plus_NK_timepoint_2-S_69_R0108
## 1 5068.2301 5785.6571
## 2 571.7537 618.9396
## 3 212.2716 185.3203
## 4 647.7775 565.9427
## 5 290.4707 298.7725
## 6 121.8675 123.6037
## 7 715.3963 824.1747
## 8 3518.5242 3694.0642
## 9 5686.0770 5153.7038
## 10 276.1582 307.4031
## Tumor_plus_NK_timepoint_2-S_94_R0108 Tumor_plus_NK_timepoint_2-S_95_R0108
## 1 2853.68834 3907.22481
## 2 315.31412 408.27858
## 3 98.55720 102.00342
## 4 376.87513 444.90837
## 5 136.49872 189.47599
## 6 12.41389 48.88943
## 7 369.86090 525.84934
## 8 582.78057 1490.16889
## 9 6266.28309 5556.06367
## 10 75.01471 102.07924
## Tumor_plus_NK_timepoint_2-S_96_R0108 Tumor_plus_NK_timepoint_2-S_97_R0108
## 1 3054.755772 1612.10441
## 2 343.314502 268.09793
## 3 48.835770 79.95366
## 4 407.082559 210.56451
## 5 127.347277 48.09632
## 6 7.643862 0.00000
## 7 368.781959 129.49845
## 8 556.921078 155.88130
## 9 5219.548551 4777.95969
## 10 80.833154 24.79279
## Tumor_plus_NK_timepoint_2-S_98_R0108 Tumor_plus_NK_timepoint_2-S_99_R0108
## 1 3161.130849 3365.620217
## 2 335.293619 353.560196
## 3 94.332083 91.340829
## 4 258.349255 233.479909
## 5 151.191882 143.746550
## 6 9.586379 4.714868
## 7 397.607171 488.790691
## 8 964.039135 1354.323735
## 9 4752.982702 5356.586333
## 10 101.911465 115.151248
## Tumor_plus_NK_timepoint_2-S_100_R0108 Tumor_plus_NK_timepoint_2-S_101_R0108
## 1 2248.20859 3693.7522
## 2 285.03959 343.1971
## 3 126.88052 119.1217
## 4 413.17662 357.6320
## 5 63.01274 143.9118
## 6 48.40394 50.0309
## 7 229.62176 406.1431
## 8 317.53985 1435.0022
## 9 5123.10203 5736.0080
## 10 41.94704 99.5467
## Tumor_plus_NK_timepoint_2-S_102_R0108 Tumor_plus_NK_timepoint_2-S_103_R0108
## 1 2497.46950 3970.89731
## 2 257.96566 393.79609
## 3 99.21820 230.91655
## 4 419.01959 561.65120
## 5 58.51867 146.73088
## 6 31.59401 83.58817
## 7 224.34658 568.56385
## 8 377.55128 1663.47315
## 9 4886.22860 5833.79498
## 10 59.89536 144.75629
## Tumor_plus_NK_timepoint_2-S_104_R0108 Tumor_plus_NK_timepoint_2-S_25_R0114_L5311
## 1 4100.50705 2451.84810
## 2 442.56640 275.66064
## 3 141.80837 24.51542
## 4 678.64953 438.98600
## 5 212.31066 53.73839
## 6 67.29958 5.03997
## 7 520.66380 152.25959
## 8 1819.02296 112.74574
## 9 6454.32469 6012.40889
## 10 140.85471 48.22130
## Tumor_plus_NK_timepoint_2-S_26_R0114_L5311 Tumor_plus_NK_timepoint_2-S_27_R0114_L5311
## 1 1413.182567 2255.761783
## 2 196.748054 230.346953
## 3 13.427171 22.182839
## 4 312.802010 398.735564
## 5 42.510684 23.607159
## 6 7.504856 8.796351
## 7 169.835020 135.128724
## 8 130.822084 87.631803
## 9 3255.278358 3864.183482
## 10 42.830920 12.685330
## Tumor_plus_NK_timepoint_2-S_28_R0114_L5311 Tumor_plus_NK_timepoint_2-S_29_R0114_L5311
## 1 1512.3967890 3662.44856
## 2 139.5622890 286.44692
## 3 57.1499529 142.65577
## 4 240.5679225 427.38939
## 5 31.4507777 128.38609
## 6 0.7673959 10.18098
## 7 119.9576377 369.55434
## 8 25.1898319 906.82660
## 9 3509.5858480 4351.23008
## 10 22.4132504 124.62080
## Tumor_plus_NK_timepoint_2-S_30_R0114_L5311 Tumor_plus_NK_timepoint_2-S_31_R0114_L5311
## 1 2026.09662 1379.749209
## 2 151.23444 166.924585
## 3 65.04070 64.058822
## 4 311.23758 310.013115
## 5 44.45756 29.130493
## 6 0.00000 9.202746
## 7 216.07682 122.625541
## 8 138.24193 50.223875
## 9 3933.19026 4127.948022
## 10 42.69281 32.439402
## Tumor_plus_NK_timepoint_2-S_32_R0114_L5311 Tumor_plus_NK_timepoint_2-S_33_R0114_L5311
## 1 1007.45324 1090.70706
## 2 121.56246 137.02984
## 3 22.86767 34.30605
## 4 299.26832 256.26347
## 5 12.45582 17.03743
## 6 11.72777 10.37472
## 7 85.44529 88.40889
## 8 22.68844 18.53216
## 9 3051.54335 3363.10199
## 10 34.69610 26.12183
## Tumor_plus_NK_timepoint_2-S_34_R0114_L5311 Tumor_plus_NK_timepoint_2-S_35_R0114_L5311
## 1 2545.25905 1889.68993
## 2 475.60652 375.51234
## 3 138.20748 91.00698
## 4 599.21008 413.73002
## 5 20.33720 16.11670
## 6 79.92778 25.93714
## 7 170.30992 117.15614
## 8 77.83998 52.85539
## 9 6938.32427 5117.73916
## 10 30.00380 25.41617
## Tumor_plus_NK_timepoint_2-S_36_R0114_L5311
## 1 1899.567509
## 2 397.353609
## 3 99.386914
## 4 458.410596
## 5 27.913123
## 6 9.535085
## 7 90.224031
## 8 45.286888
## 9 6247.357109
## 10 28.809331
## [ reached 'max' / getOption("max.print") -- omitted 14987 rows ]
metadata_heatmap <- as.data.frame(colData(dds_subset))
transf_object <- vsd_subset_filt
heatmap_counts <- SummarizedExperiment::assay(transf_object)
heatmap_counts_NObatch <- SummarizedExperiment::assay(transf_batch_NObatch_experiment) # removed xperiment batch effects! ? use experimen+cell_line removed???
heatmap_counts_NObatch_deg <- heatmap_counts_NObatch[rownames(heatmap_counts_NObatch) %in% deg_TpNK_tp2_vs_Tonly_tp1_results$results_signif$ensembl_id, ]
annotation_col <- metadata_heatmap %>%
dplyr::select(experiment, cell_line_label, condition_tp) %>% # condition, timepoint_cell_harvesting,
dplyr::arrange(condition_tp, cell_line_label)
heatmap_counts_NObatch_deg_ord <- heatmap_counts_NObatch_deg[, match(rownames(annotation_col), colnames(heatmap_counts_NObatch_deg))]
ensembl2symbol_annot <- deg_TpNK_tp2_vs_Tonly_tp1_results$results_all %>%
dplyr::select(ensembl_id, mgi_symbol)
# select top 20 and 10 gens from ab cd up and down regulated
AB_res_path <- "~/workspace/results_dir/nk_tum_immunoedit_complete/deg_TpNK_tp2_vs_Tonly_tp1_AB/deg_TpNK_tp2_vs_Tonly_tp1_AB_results.xlsx"
CD_res_path <- "~/workspace/results_dir/nk_tum_immunoedit_complete/deg_TpNK_tp2_vs_Tonly_tp1_CD/deg_TpNK_tp2_vs_Tonly_tp1_CD_results.xlsx"
AB_results <- openxlsx::read.xlsx(xlsxFile = AB_res_path, sheet = "results_signif")
CD_results <- openxlsx::read.xlsx(xlsxFile = CD_res_path, sheet = "results_signif")
AB_CD_intersect_genes <- intersect(AB_results$mgi_symbol, CD_results$mgi_symbol)
AB_res_subset <- AB_results[AB_results$mgi_symbol %in% AB_CD_intersect_genes,]
CD_res_subset <- CD_results[CD_results$mgi_symbol %in% AB_CD_intersect_genes,]
combined_AB_CD <- rbind.data.frame(AB_res_subset[,1:14], CD_res_subset[,1:14])
AB_CD_res_subset_UP <- combined_AB_CD[combined_AB_CD$log2FoldChange > 0,]
AB_CD_res_subset_DOWN <- combined_AB_CD[combined_AB_CD$log2FoldChange < 0,]
AB_CD_res_subset_UP <- AB_CD_res_subset_UP %>% arrange(padj)
AB_CD_res_subset_UP <- AB_CD_res_subset_UP[!duplicated(AB_CD_res_subset_UP$mgi_symbol),] %>% slice_head(n=20)
AB_CD_res_subset_DOWN <- AB_CD_res_subset_DOWN %>% arrange(padj)
AB_CD_res_subset_DOWN <- AB_CD_res_subset_DOWN[!duplicated(AB_CD_res_subset_DOWN$mgi_symbol),]%>% slice_head(n=10)
geneset <- c(AB_CD_res_subset_UP$mgi_symbol, AB_CD_res_subset_DOWN$mgi_symbol)
ensembl_gene_set <- ensembl2symbol_annot[which(ensembl2symbol_annot$mgi_symbol %in% geneset),]
ensembl_gene_set <- ensembl_gene_set[ match(geneset , ensembl_gene_set$mgi_symbol),]
indexes <- which(row.names(heatmap_counts_NObatch_deg_ord) %in% ensembl_gene_set$ensembl_id)
heatmap_counts_NObatch_deg_ord <- heatmap_counts_NObatch_deg_ord[indexes,]
heatmap_counts_NObatch_deg_ord <- heatmap_counts_NObatch_deg_ord[ match(ensembl_gene_set$ensembl_id , row.names(heatmap_counts_NObatch_deg_ord)),]
rownames(heatmap_counts_NObatch_deg_ord) <- ensembl_gene_set$mgi_symbol
######
color.scheme <- rev(RColorBrewer::brewer.pal(8,"RdBu")) # generate the color scheme to use
ann_colors = list(
experiment = c(EXP9.4 = "#00262E", EXP9.5 = "#005f73", EXP9.6 = "#0a9396", EXP9.7 = "#94d2bd"),
condition_tp = c(Tumor_only_timepoint_1 = "#DD3344", Tumor_plus_NK_timepoint_1 = "#FF9F1C", Tumor_plus_NK_timepoint_2 = "#553388"),
cell_line_label = c( A = "#E9D8A6", B = "#D9BE6D", C = "#676F54", D = "#395B50")
)
heatmap_corrected <- pheatmap::pheatmap(heatmap_counts_NObatch_deg_ord,
main = "Heatmap of signif. DEG",
scale = "row",
annotation_col = annotation_col,
annotation_colors = ann_colors,
#annotation_row = row_annot_symbols,
show_colnames = FALSE,
cluster_cols = FALSE,
cluster_rows = FALSE,
show_rownames = TRUE,
color = color.scheme,
fontsize = 10, fontsize_row = 10 #height=10, cellwidth = 11, cellheight = 11
) # <img src=“/home/rstudio/workspace/nk_tum_immunoediting_files/figure-html/03”try to plot in the big, integrated DATASET”-1.png” width=“672” />
ggsave(filename = paste0("~/workspace/results_dir/nk_tum_immunoedit_complete/deg_TpNK_tp2_AB_CD_comparison/", "TpNK_tp2_vs_Tonly_tp1_heatmap_corrected_small_shared_gene_set_padj.eps"),
plot=heatmap_corrected,
width = 30, height = 14, units = "cm")
ggsave(filename = paste0("~/workspace/results_dir/nk_tum_immunoedit_complete/deg_TpNK_tp2_AB_CD_comparison/", "TpNK_tp2_vs_Tonly_tp1_heatmap_corrected_small_shared_gene_set_padj.png"),
plot=heatmap_corrected,
width = 30, height = 14, units = "cm")# Loading the required dependencies:
import::from(.from = variancePartition, fitExtractVarPartModel, sortCols, plotVarPart)
import::from(.from = doParallel, registerDoParallel)
import::from(.from = parallel, makeCluster, stopCluster)
# import utils scripts
import::from(.from = here::here("utils/filterDatasets.R"), "filterDatasets", .character_only=TRUE) # used for filtering
import::from(.from = here::here("utils/rnaSelectTopVarGenes.R"), "rnaSelectTopVarGenes", .character_only=TRUE)
import::from(.from = here::here("utils/edaFunctions.R"), "varPartitionEstimate", "generatePCA", "pcaExtractVariance", "pcaPlotVariance", "pcaCorrPCs", "pcaCorrPCsPlot", .character_only=TRUE)
import::from(.from = here::here("utils/generateResults.R"), "generateResults_upd", .character_only=TRUE) #
import::from(.from = here::here("utils/plotDegResults.R"), "plotVolcano","plotVolcano_repel", .character_only=TRUE)
import::from(.from = here::here("utils/generateEnsemblAnnotation.R"), "generateEnsemblAnnotation", .character_only=TRUE) # used for filtering#project setup
# loading config file ----
config_file <- file.path( "nk_tum_immunoediting_config.yaml") # params$config_file; params not found? ; change to project_config.yaml
config <- yaml::read_yaml(config_file)
#config <- yaml::read_yaml(params$config_file)
experiment_name="nk_tum_immunoedit_complete_ab2" # this can be a subset analysis - e.g. just batch1,...
experiment_design="1" # used only to construct initial dds object
abs_filt_samples=3
# parameters for annotation
biomart_host = "http://nov2020.archive.ensembl.org"
biomart_Ens_version = "Ensembl Genes 102"
biomart_dataset="mmusculus_gene_ensembl"
# directories
output_dir = paste0("./results_dir", "/",experiment_name,"/")
dir.create(output_dir)
# importing only key functions that are actually used - not to polute namespace!
import::from(.from = readr, read_csv, cols)
import::from(magrittr, "%>%")
import::from(dplyr, mutate, select, filter, rename, arrange, desc, group_by, summarise, ungroup) # dplyr_mutate = mutate
import::from(purrr, map)
import::from(future, plan, multisession, sequential)
import::from(furrr, furrr_options, future_map2)
import::from(ggplot2, .all=TRUE) # importing all as there is too many
import::from(grid, gpar) # needed in complexheatmap
import::from(kableExtra, kable_styling, kbl)
import::from(.from = GenomicFeatures, makeTxDbFromGFF)
import::from(.from = AnnotationDbi, annot_db_keys = keys, annot_db_select = select)
import::from(.from = tximport, tximport)
import::from(.from = DESeq2, .all=TRUE)
#table(metadata$condition_tp, metadata$experiment)# Create a list of comparisons:
# Full list
# list_of_comparisons <- list(c("Tumor_plus_WT_NK_timepoint_2", "Tumor_only_timepoint_2"),
# c("Tumor_plus_IFNg_timepoint_2", "Tumor_only_timepoint_2"),
# c("Tumor_plus_PrfKO_NK_timepoint_2", "Tumor_only_timepoint_2"),
# c("Tumor_plus_WT_NK_timepoint_2", "Tumor_plus_IFNg_timepoint_2"),
# c("Tumor_plus_WT_NK_timepoint_2", "Tumor_plus_PrfKO_NK_timepoint_2"),
# c("Tumor_only_timepoint_0", "Tumor_only_timepoint_1"),
# c("Tumor_only_timepoint_0", "Tumor_only_timepoint_2"),
# c("Tumor_only_timepoint_1", "Tumor_only_timepoint_2"),
# c("Tumor_plus_WT_NK_timepoint_2", "Tumor_only_timepoint_1"))
list_of_comparisons <- list(c("Tumor_plus_WT_NK_timepoint_2", "Tumor_only_timepoint_2"),
c("Tumor_plus_IFNg_timepoint_2", "Tumor_only_timepoint_2"),
c("Tumor_plus_PrfKO_NK_timepoint_2", "Tumor_only_timepoint_2"))
experiment_name="nk_tum_immunoedit_complete"
output_dir <- "/home/rstudio/workspace/results_dir/nk_tum_immunoedit_complete/"
# Loading the data
# raw and filtered dds
precomputed_objects_filename <- paste0(experiment_name, "_dds_objects.RData") #"nk_tum_immunoedit_complete_dds_objects.RData"
precomputed_objects_file <- file.path(output_dir, precomputed_objects_filename)
# log2 and vsd transformed
precomputed_transf_objects_filename <- "nk_tum_immunoedit_complete_log2_vsd_filt_objects.RData"
precomputed_transf_objects_file <- file.path(output_dir, precomputed_transf_objects_filename)
# check if object loaded if not load
if(!exists(precomputed_objects_file)){
load(precomputed_objects_file)
load(precomputed_transf_objects_file)
}
# defining parameters
abs_filt_samples=3
padj_cutoff = 0.05
log2FC_cutoff = 0.58 #(FC=1.5); log2FC=1.0 # (FC=2)
var_expl_needed <- 0.6 # at least 60% variance explained needed
experiment_subset <- c("EXP9.8") #we use only this experiment
if(exists("deg_design")) {rm(deg_design)}
deg_design <- as.formula("~ cell_line_label + condition_tp") #Experiment is missing here, since we have only EXP9.8
# Loading MsigDB geneset collections ----
gs_hallmark <- hypeR::msigdb_gsets(species = "Mus musculus", category = c("H"), clean=TRUE)
gs_C2_kegg <- hypeR::msigdb_gsets(species = "Mus musculus", category = c("C2"), subcategory = "CP:KEGG", clean=TRUE)
gs_C5_GOBP <- hypeR::msigdb_gsets(species = "Mus musculus", category = c("C5"), subcategory = "GO:BP", clean=TRUE)
gs_C5_GOCC <- hypeR::msigdb_gsets(species = "Mus musculus", category = c("C5"), subcategory = "GO:CC", clean=TRUE)
gs_C5_GOMF <- hypeR::msigdb_gsets(species = "Mus musculus", category = c("C5"), subcategory = "GO:MF", clean=TRUE)
genes_of_interest <- list("Denn2b"="ENSMUSG00000031024",
"Gbp6"="ENSMUSG00000104713",
"H2-K1"="ENSMUSG00000061232",
"Hs3st2"="ENSMUSG00000046321",
"Htra3"="ENSMUSG00000029096",
"Ifi27"="ENSMUSG00000064215",
"Igtp"="ENSMUSG00000078853",
"Irf1"="ENSMUSG00000018899",
"Lgals3bp"="ENSMUSG00000033880",
"Ly6a"="ENSMUSG00000075602",
"Plaat3"="ENSMUSG00000060675",
"Rtp4"="ENSMUSG00000033355",
"Serpina3f"="ENSMUSG00000066363",
"Stat1"="ENSMUSG00000026104")
genes_of_interest_exp9.8 <- list("H2-Eb1" = "ENSMUSG00000060586",
"Slc16a3"= "ENSMUSG00000025161",
"H2-Aa" = "ENSMUSG00000036594",
"Bnip3" = "ENSMUSG00000078566",
"Il1r2" = "ENSMUSG00000026073",
"Dapk2" = "ENSMUSG00000032380",
"Zfhx3" = "ENSMUSG00000038872",
"Dpp7" = "ENSMUSG00000026958")
#specifically for the publication
genes_of_interest_exp9.4_9.7 <- list('Ifi44' ="ENSMUSG00000028037",
'Ccl5' ="ENSMUSG00000035042",
'Iigp1' ="ENSMUSG00000054072",
'Serpina3f' ="ENSMUSG00000066363",
'Ly6a' ="ENSMUSG00000075602",
'Plaat3' ="ENSMUSG00000060675",
'Lgals3bp' ="ENSMUSG00000033880",
'Ifi27' ="ENSMUSG00000064215",
'Gbp6' ="ENSMUSG00000104713",
'Rtp4' ="ENSMUSG00000033355",
'Stat1' ="ENSMUSG00000026104",
'Tap1' ="ENSMUSG00000037321",
'H2-K1' ="ENSMUSG00000061232",
'B2m' ="ENSMUSG00000060802",
'Igtp' ="ENSMUSG00000078853",
'Gtf2ird1' ="ENSMUSG00000023079",
'Cdc42ep4' ="ENSMUSG00000041598",
'Hoxb6' ="ENSMUSG00000000690",
'Bfsp2' ="ENSMUSG00000032556",
'Sox6' ="ENSMUSG00000051910",
'Tgm2' ="ENSMUSG00000037820",
'Adprhl1' ="ENSMUSG00000031448")# this will be a counter
cid <- 0
#Star of the loop
for (comparison_i in list_of_comparisons){
cid <- cid + 1
#determine the name of the comparison
deg_name <- paste0(cid, comparison_i[1], comparison_i[2]) #"c1_Tumor_plus_WT_NK_timepoint_2_VS_Tumor_only_timepoint_2"
deg_dir <- file.path(output_dir, paste0("deg_", deg_name, "/"))
dir.create(deg_dir)
#this is the tow sample that we are going to compare between
condition_tp_subset <- comparison_i
# preparing dds and vsd objects for the main project comparison
precomputed_objects_deg_filename <- paste0(deg_name, "_dds_objects.RData")
precomputed_objects_deg_file <- file.path(deg_dir, precomputed_objects_deg_filename)
precomputed_transf_objects_deg_filename <- paste0(deg_name, "_log2_vsd_filt_objects.RData")
precomputed_transf_objects_deg_file <- file.path(deg_dir, precomputed_transf_objects_deg_filename)
# just in case remove any existing subsets
if(exists("dds_subset")) {rm(dds_subset)}
if(exists("dds_subset_filt")) {rm(dds_subset_filt)}
if(exists("vsd_subset")) {rm(vsd_subset)}
if(!file.exists(precomputed_objects_deg_file)){
cat("RNA objects file '", precomputed_objects_deg_filename, "' does not exist in the OUTPUT_DIR...\n")
cat("generating", precomputed_objects_deg_filename, "\n")
dds_subset <- dds_filt[ , (dds_filt$condition_tp %in% condition_tp_subset) & (dds_filt$experiment %in% experiment_subset)]
# Dropping levels
dds_subset$condition_tp <- droplevels(dds_subset$condition_tp)
table(dds_subset$condition_tp)
dds_subset$experiment <- droplevels(dds_subset$experiment)
table(dds_subset$experiment)
dds_subset$experiment <- droplevels(dds_subset$experiment)
dds_subset$cell_line_label <- droplevels(dds_subset$cell_line_label)
# filtering lowly expressed genes
dds_subset_filt <- filterDatasets(dds_subset,
abs_filt = TRUE,
abs_filt_samples = abs_filt_samples) # at least in N samples, which is a smallest group size
design(dds_subset_filt) <- deg_design
dds_subset_filt <- DESeq2::estimateSizeFactors(dds_subset_filt)
dds_subset_filt <- DESeq2::DESeq(dds_subset_filt) # do not replace outliers based on replicates
cat("saving...", precomputed_objects_file, "\n")
cat("...into:", output_dir, "\n")
save(dds_subset, dds_subset_filt, ensemblAnnot, file = precomputed_objects_deg_file)
} else{
cat("RNA objects file '", precomputed_objects_deg_filename, "' exist...loading\n")
load(precomputed_objects_deg_file)
}
#transformation after filtering
if(!file.exists(precomputed_transf_objects_deg_file)){
# transformation
log2_norm_subset_filt <- DESeq2::normTransform(dds_subset_filt)
vsd_subset_filt <- DESeq2::vst(dds_subset_filt, blind = FALSE) # using blind=FALSE utilize design info; blind = TRUE for QC
save(log2_norm_subset_filt, vsd_subset_filt,
file = precomputed_transf_objects_deg_file)
} else{
load(precomputed_transf_objects_deg_file)
}
rm(key_metadata_subset, key_variables_tableOne) # in case this exists from previous runs
key_metadata_subset <- as.data.frame(colData(dds_subset)) %>%
dplyr::select(experiment, condition_tp, cell_line_label, technical_replicate)
key_variables_tableOne <- tableone::CreateTableOne(vars = colnames(dplyr::select(key_metadata_subset, -condition_tp)),
strata = c("condition_tp"),
data = key_metadata_subset)
tableone::kableone(key_variables_tableOne$CatTable,
caption = "Overview of number of samples in different categories (experiment, condition,...).") %>%
kableExtra::kable_material(c("striped", "hover"))
transf_object <- vsd_subset_filt
#Before removing Batch Effect
pca_deg_plotCellLabel <- generatePCA(transf_object = transf_object,
cond_interest_varPart = c("condition_tp", "cell_line_label", "experiment"),
color_variable = "condition_tp",
shape_variable = "cell_line_label",
ntop_genes = 1000) +
ggtitle(paste(comparison_i, "Comparison. Dataset Before Batch Correction"))
#plot(pca_deg_plotCellLabel)
# Remove batch effect (only for the cell_line_label)
transf_batch_NObatch_cell_label <- vsd_subset_filt
transf_batch_NObatch_cell_label_count <- limma::removeBatchEffect(SummarizedExperiment::assay(transf_batch_NObatch_cell_label),
transf_batch_NObatch_cell_label$cell_line_label)
SummarizedExperiment::assay(transf_batch_NObatch_cell_label) <- transf_batch_NObatch_cell_label_count
pca_deg_NObatch_cell_label <- generatePCA(transf_object = transf_batch_NObatch_cell_label,
cond_interest_varPart = c("condition_tp", "cell_line_label"),
color_variable = "condition_tp",
shape_variable = "cell_line_label",
ntop_genes = 1000) +
ggtitle(paste(comparison_i, "Comparison. Removed batchEffect - cell_line"))
#plot(pca_deg_NObatch_cell_label)
PCA_batch_highlightCellLines_merged <- ggpubr::ggarrange(pca_deg_plotCellLabel,
pca_deg_NObatch_cell_label,
common.legend = TRUE)
ggsave(filename = paste0(deg_dir, comparison_i[1], "_VS_",comparison_i[2], "batch_correction.png"),
plot=PCA_batch_highlightCellLines_merged,
width = 40, height = 20, units = "cm")
#Generate results
deg_comparison_results <- generateResults_upd(dds_object = dds_subset_filt,
coeff_name = resultsNames(dds_subset_filt)[3],
cond_numerator = comparison_i[1],
cond_denominator = comparison_i[2],
cond_variable="condition_tp")
#export it as xlsx table
openxlsx::write.xlsx(deg_comparison_results, file = file.path(deg_dir, paste0("deg", comparison_i[1], '_vs_', comparison_i[2], "_results.xlsx")))
#generating GOBP GOCC GOMF
deg_comparison_results_enrichment_C5_GOBP <- hypeR::hypeR(signature = deg_comparison_results$results_signif$mgi_symbol,
genesets = gs_C5_GOBP,
test="hypergeometric",
background=nrow(dds_subset_filt))
deg_comparison_results_enrichment_C5_GOCC <- hypeR::hypeR(signature = deg_comparison_results$results_signif$mgi_symbol,
genesets = gs_C5_GOCC,
test="hypergeometric",
background=nrow(dds_subset_filt))
deg_comparison_results_enrichment_C5_GOMF <- hypeR::hypeR(signature = deg_comparison_results$results_signif$mgi_symbol,
genesets = gs_C5_GOMF,
test="hypergeometric",
background=nrow(dds_subset_filt))
# hypeR::hyp_show(hyp_obj$data$clA, simple = FALSE)
hypeR::hyp_to_excel(deg_comparison_results_enrichment_C5_GOBP, file_path=file.path(deg_dir, paste0(comparison_i[1], comparison_i[2],"_enrichment_C5_GOBP.xlsx")))
hypeR::hyp_to_excel(deg_comparison_results_enrichment_C5_GOCC, file_path=file.path(deg_dir, paste0(comparison_i[1], comparison_i[2],"_enrichment_C5_GOCC.xlsx")))
hypeR::hyp_to_excel(deg_comparison_results_enrichment_C5_GOMF, file_path=file.path(deg_dir, paste0(comparison_i[1], comparison_i[2],"_enrichment_C5_GOMF.xlsx")))
# plotting enrichment results ----
deg_comparison_results_enrichment_C5_GOBP_plot <- hypeR::hyp_dots(deg_comparison_results_enrichment_C5_GOBP, merge=TRUE, fdr=0.05, top = 20, abrv=70, val="fdr", title=paste0("GOBP: ",comparison_i[1], comparison_i[2]))
deg_comparison_results_enrichment_C5_GOCC_plot <- hypeR::hyp_dots(deg_comparison_results_enrichment_C5_GOCC, merge=TRUE, fdr=0.05, top = 20, abrv=70, val="fdr", title=paste0("GOCC: ",comparison_i[1], comparison_i[2]))
deg_comparison_results_enrichment_C5_GOMF_plot <- hypeR::hyp_dots(deg_comparison_results_enrichment_C5_GOMF, merge=TRUE, fdr=0.05, top = 20, abrv=70, val="fdr", title=paste0("GOMF: ",comparison_i[1], comparison_i[2]))
ggsave(filename = paste0(deg_dir, comparison_i[1], "_VS_",comparison_i[2], "_GOBP_plot.png"),
plot=deg_comparison_results_enrichment_C5_GOBP_plot,
width = 20, height = 20, units = "cm")
ggsave(filename = paste0(deg_dir, comparison_i[1], "_VS_",comparison_i[2], "_GOCC_plot.png"),
plot=deg_comparison_results_enrichment_C5_GOCC_plot,
width = 20, height = 20, units = "cm")
ggsave(filename = paste0(deg_dir, comparison_i[1], "_VS_",comparison_i[2], "_GOMF_plot.png"),
plot=deg_comparison_results_enrichment_C5_GOMF_plot,
width = 20, height = 20, units = "cm")
# volcano plot
deg_comparison_results_volcano_plot <- plotVolcano_repel(dds_results_obj = deg_comparison_results$results_all,
genes_of_interest = genes_of_interest,
genes_of_interest2 = genes_of_interest_exp9.8,
plot_title = paste0(comparison_i[1], ' vs ', comparison_i[2]),
max_overlaps = 30)
ggsave(filename = paste0(deg_dir, comparison_i[1], "_VS_",comparison_i[2], "_volcano_plot.png"),
plot=deg_comparison_results_volcano_plot,
width = 20, height = 20, units = "cm")
ggsave(filename = paste0(deg_dir, comparison_i[1], "_VS_",comparison_i[2], "_volcano_plot.eps"),
plot=deg_comparison_results_volcano_plot,
width = 20, height = 20, units = "cm")
print(deg_comparison_results_volcano_plot)
# heatmap and heatmap batch corrected - corrected
save_pheatmap_pdf <- function(x, filename, width=7, height=7) {
stopifnot(!missing(x))
stopifnot(!missing(filename))
pdf(filename, width=width, height=height)
grid::grid.newpage()
grid::grid.draw(x$gtable)
dev.off()
}
metadata_heatmap <- as.data.frame(colData(dds_subset))
heatmap_counts <- SummarizedExperiment::assay(transf_object)
heatmap_counts_NObatch <- SummarizedExperiment::assay(transf_batch_NObatch_cell_label) # removed xperiment batch effects! ? use experimen+cell_line removed???
heatmap_counts_deg <- heatmap_counts[rownames(heatmap_counts) %in% deg_comparison_results$results_signif$ensembl_id, ]
heatmap_counts_NObatch_deg <- heatmap_counts_NObatch[rownames(heatmap_counts_NObatch) %in% deg_comparison_results$results_signif$ensembl_id, ]
annotation_col <- metadata_heatmap %>%
dplyr::select(cell_line_label, condition_tp) %>% # condition, timepoint_cell_harvesting,
dplyr::arrange(condition_tp, cell_line_label)
heatmap_counts_deg_ord <- heatmap_counts_deg[, match(rownames(annotation_col), colnames(heatmap_counts_deg))]
heatmap_counts_NObatch_deg_ord <- heatmap_counts_NObatch_deg[, match(rownames(annotation_col), colnames(heatmap_counts_NObatch_deg))]
ensembl2symbol_annot <- deg_comparison_results$results_all %>%
dplyr::select(ensembl_id, mgi_symbol)
color.scheme <- rev(RColorBrewer::brewer.pal(8,"RdBu")) # generate the color scheme to use
ann_colors = list(
experiment = c(EXP9.8 = "grey")
)
# , filename = paste0(deg_TpNK_tp2_vs_Tonly_tp1_DIR, "tp2_vs_tp1_inclEXP9_7_heatmap_intersect_54genes.pdf")
heatmap_result <- pheatmap::pheatmap(heatmap_counts_NObatch_deg_ord,
main = "Heatmap of signif. DEG - removed cell line BatchEffect",
scale = "row",
annotation_col = annotation_col,
annotation_colors = ann_colors,
#annotation_row = row_annot_symbols,
show_colnames = FALSE,
show_rownames = FALSE,
cluster_cols = FALSE,
#cluster_rows = counts_deg_ord_row_cor_hclust,
color = color.scheme,
fontsize = 10, fontsize_row = 10 #height=10, cellwidth = 11, cellheight = 11
)
save_pheatmap_pdf(filename = paste0(deg_dir, comparison_i[1], "_VS_",comparison_i[2], "_heatmap_batch_eff_removed.pdf"),
x=heatmap_result,
width = 10, height = 10)
ggsave(filename = paste0(deg_dir, comparison_i[1], "_VS_",comparison_i[2], "_heatmap_batch_eff_removed.png"),
plot=heatmap_result,
width = 10, height = 10, units = "in")
#Heatmap not corrected for the batch effect
heatmap_result <- pheatmap::pheatmap(heatmap_counts_deg_ord,
main = "Heatmap of signif. DEG",
scale = "row",
annotation_col = annotation_col,
annotation_colors = ann_colors,
#annotation_row = row_annot_symbols,
show_colnames = FALSE,
show_rownames = FALSE,
cluster_cols = FALSE,
#cluster_rows = counts_deg_ord_row_cor_hclust,
color = color.scheme,
fontsize = 10, fontsize_row = 10 #height=10, cellwidth = 11, cellheight = 11
)
save_pheatmap_pdf(filename = paste0(deg_dir, comparison_i[1], "_VS_",comparison_i[2], "_heatmap.pdf"),
x=heatmap_result,
width = 10, height = 10)
ggsave(filename = paste0(deg_dir, comparison_i[1], "_VS_",comparison_i[2], "_heatmap.png"),
plot=heatmap_result,
width = 10, height = 10, units = "in")
} ## RNA objects file ' 1Tumor_plus_WT_NK_timepoint_2Tumor_only_timepoint_2_dds_objects.RData ' exist...loading
## RNA objects file ' 2Tumor_plus_IFNg_timepoint_2Tumor_only_timepoint_2_dds_objects.RData ' exist...loading
## RNA objects file ' 3Tumor_plus_PrfKO_NK_timepoint_2Tumor_only_timepoint_2_dds_objects.RData ' exist...loading
Deg1_filename <- paste0("~/workspace/results_dir/nk_tum_immunoedit_complete/deg_1Tumor_plus_WT_NK_timepoint_2Tumor_only_timepoint_2/degTumor_plus_WT_NK_timepoint_2_vs_Tumor_only_timepoint_2_results.xlsx")
Deg2_filename <- paste0("~/workspace/results_dir/nk_tum_immunoedit_complete/deg_2Tumor_plus_IFNg_timepoint_2Tumor_only_timepoint_2/degTumor_plus_IFNg_timepoint_2_vs_Tumor_only_timepoint_2_results.xlsx")
Deg3_filename <- paste0("~/workspace/results_dir/nk_tum_immunoedit_complete/deg_3Tumor_plus_PrfKO_NK_timepoint_2Tumor_only_timepoint_2/degTumor_plus_PrfKO_NK_timepoint_2_vs_Tumor_only_timepoint_2_results.xlsx")
Deg1 <- openxlsx::read.xlsx(Deg1_filename,sheet = 'results_signif')
Deg1_ens <- Deg1[,c('mgi_symbol', 'ensembl_id')]
Deg1 <- Deg1$mgi_symbol
Deg2 <- openxlsx::read.xlsx(Deg2_filename,sheet = 'results_signif')
Deg2_ens <- Deg2[, c('mgi_symbol', 'ensembl_id')]
Deg2 <- Deg2$mgi_symbol
Deg3 <- openxlsx::read.xlsx(Deg3_filename,sheet = 'results_signif')
Deg3_ens <- Deg3[, c('mgi_symbol', 'ensembl_id')]
Deg3 <- Deg3$mgi_symbol
Deg_list <- list("Tmr+WT_NK TP2 vs T only TP2" = Deg1,
"Tmr+IFNg TP2 vs T only TP2" = Deg2,
"Tmr+PrfKO NK TP2 vs T only TP2" = Deg3)
library(ggvenn)
ven_diag <- ggvenn(Deg_list)
ven_diag#order the genes for the further Heatmap
Deg12_intersect <- intersect(Deg1, Deg2)
Deg123_intersect <- intersect(Deg12_intersect, Deg3)
Deg23_intersect <- intersect(Deg2, Deg3)
Deg31_intersect <- intersect(Deg3, Deg1)
Deg12_intersect <- Deg12_intersect[!(Deg12_intersect %in% Deg123_intersect)]
Deg23_intersect <- Deg23_intersect[!(Deg23_intersect %in% Deg123_intersect)]
Deg31_intersect <- Deg31_intersect[!(Deg31_intersect %in% Deg123_intersect)]
Deg1_unique <- Deg1[!(Deg1 %in% Deg12_intersect |
Deg1 %in% Deg31_intersect |
Deg1 %in% Deg123_intersect)]
Deg2_unique <- Deg2[!(Deg2 %in% Deg12_intersect |
Deg2 %in% Deg23_intersect |
Deg2 %in% Deg123_intersect)]
Deg3_unique <- Deg3[!(Deg3 %in% Deg31_intersect |
Deg3 %in% Deg23_intersect |
Deg3 %in% Deg123_intersect)]
List_of_genes_ordered <- c(Deg31_intersect,
Deg1_unique,
Deg12_intersect,
Deg2_unique,
Deg23_intersect,
Deg3_unique,
Deg123_intersect)
Deg_ens <- rbind(Deg1_ens, Deg2_ens, Deg3_ens)
Deg_ens <- unique(Deg_ens)
Deg_ens_ord <- Deg_ens[match(List_of_genes_ordered, Deg_ens$mgi_symbol),]#These are the data that will be used for the heatmap
list_of_comparisons_venn_heatmap <- c("Tumor_plus_WT_NK_timepoint_2",
"Tumor_plus_IFNg_timepoint_2",
"Tumor_plus_PrfKO_NK_timepoint_2",
"Tumor_only_timepoint_2")
experiment_subset <- c("EXP9.8")
dds_subset_venn_heatmap <- dds_filt[ , (dds_filt$condition_tp %in% list_of_comparisons_venn_heatmap) & (dds_filt$experiment %in% experiment_subset)]
metadata_heatmap <- as.data.frame(colData(dds_subset_venn_heatmap))
heatmap_counts <- SummarizedExperiment::assay(dds_subset_venn_heatmap)
heatmap_counts_deg <- heatmap_counts[rownames(heatmap_counts) %in% Deg_ens_ord$ensembl_id,]
annotation_col <- metadata_heatmap %>%
dplyr::select(cell_line_label, condition_tp) %>% # condition, timepoint_cell_harvesting,
dplyr::arrange(condition_tp, cell_line_label)
heatmap_counts_deg_ord <- heatmap_counts_deg[, match(rownames(annotation_col), colnames(heatmap_counts_deg))]
heatmap_counts_deg_ord <- heatmap_counts_deg_ord[match(Deg_ens_ord$ensembl_id, rownames(heatmap_counts_deg_ord)),]
rownames(heatmap_counts_deg_ord) == Deg_ens_ord$ensembl_id## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [27] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [53] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [79] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [105] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [131] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [157] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [183] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [209] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [235] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [261] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [287] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [313] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [339] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [365] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [391] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [417] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [443] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [469] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [495] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [521] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [547] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [573] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [599] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [625] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [651] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [677] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [703] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [729] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
rownames(heatmap_counts_deg_ord) <- Deg_ens_ord$mgi_symbol
color.scheme <- rev(RColorBrewer::brewer.pal(8,"RdBu")) # generate the color scheme to use
ann_colors = list(
experiment = c(EXP9.4 = "#4682B4", EXP9.5 = "#7846B4", EXP9.6 = "#B47846", EXP9.7 = "#82B446", EXP9.8 = "grey")
)
heatmap_result <- pheatmap::pheatmap(heatmap_counts_deg_ord,
main = "Heatmap of signif. DEG",
scale = "row",
annotation_col = annotation_col,
annotation_colors = ann_colors,
#annotation_row = row_annot_symbols,
show_colnames = FALSE,
show_rownames = TRUE,
cluster_cols = FALSE,
cluster_rows = T,
annotation_names_row = TRUE,
#cluster_rows = counts_deg_ord_row_cor_hclust,
color = color.scheme,
fontsize = 10, fontsize_row = 10 #height=10, cellwidth = 11, cellheight = 11
)# This heatmap doesn't look so great.
# save the DEG list:
List_of_genes_ordered_forxlsx <- list("Deg1_T+WT_NK_TP2_VS_To_TP2" = Deg1,
"Deg2_T+IFNg_TP2_VS_To_TP2" = Deg2,
"Deg3_T+PrfKO_NK_TP2_VS_To_TP2" = Deg3,
"Deg31_exlusive_intersect" = Deg31_intersect,
"Deg1_unique" = Deg1_unique,
"Deg12_exlusive_intersect" = Deg12_intersect,
"Deg2_unique" = Deg2_unique,
"Deg23_exlusive_intersect" = Deg23_intersect,
"Deg3_unique" = Deg3_unique,
"Deg123_intersect" = Deg123_intersect)
openxlsx::write.xlsx(List_of_genes_ordered_forxlsx,
"~/workspace/results_dir/nk_tum_immunoedit_complete/overlaping_genes_deg_1_2_3.xlsx")
# adding supplementary information (p-val, L2FC, etc for genes)
# read the DEG files again
Deg1_xlsx <- openxlsx::read.xlsx(Deg1_filename,sheet = 'results_signif')
Deg1_xlsx <- Deg1_xlsx %>% select("ensembl_id", "mgi_symbol", "mgi_description", "log2FoldChange", "padj")
Deg2_xlsx <- openxlsx::read.xlsx(Deg2_filename,sheet = 'results_signif')
Deg2_xlsx <- Deg2_xlsx %>% select("ensembl_id", "mgi_symbol", "mgi_description", "log2FoldChange", "padj")
Deg3_xlsx <- openxlsx::read.xlsx(Deg3_filename,sheet = 'results_signif')
Deg3_xlsx <- Deg3_xlsx %>% select("ensembl_id", "mgi_symbol", "mgi_description", "log2FoldChange", "padj")
List_of_genes_ordered_forxlsx_sup <- List_of_genes_ordered_forxlsx
List_of_genes_ordered_forxlsx_sup$`Deg1_T+WT_NK_TP2_VS_To_TP2` <- Deg1_xlsx[match(List_of_genes_ordered_forxlsx_sup$`Deg1_T+WT_NK_TP2_VS_To_TP2`, Deg1_xlsx$mgi_symbol),]
List_of_genes_ordered_forxlsx_sup$`Deg2_T+IFNg_TP2_VS_To_TP2` <- Deg2_xlsx[match(List_of_genes_ordered_forxlsx_sup$`Deg2_T+IFNg_TP2_VS_To_TP2`, Deg2_xlsx$mgi_symbol),]
List_of_genes_ordered_forxlsx_sup$`Deg3_T+PrfKO_NK_TP2_VS_To_TP2` <- Deg3_xlsx[match(List_of_genes_ordered_forxlsx_sup$`Deg3_T+PrfKO_NK_TP2_VS_To_TP2`, Deg3_xlsx$mgi_symbol),]
List_of_genes_ordered_forxlsx_sup$Deg1_unique <- Deg1_xlsx[match(List_of_genes_ordered_forxlsx_sup$Deg1_unique, Deg1_xlsx$mgi_symbol),]
List_of_genes_ordered_forxlsx_sup$Deg2_unique <- Deg2_xlsx[match(List_of_genes_ordered_forxlsx_sup$Deg2_unique, Deg2_xlsx$mgi_symbol),]
List_of_genes_ordered_forxlsx_sup$Deg3_unique <- Deg3_xlsx[match(List_of_genes_ordered_forxlsx_sup$Deg3_unique, Deg3_xlsx$mgi_symbol),]
Deg31_excl <- Deg3_xlsx[match(List_of_genes_ordered_forxlsx_sup$Deg31_exlusive_intersect, Deg3_xlsx$mgi_symbol),] %>%
rename("DEG3_log2FoldChange" = "log2FoldChange", "DEG3_padj" = "padj")
Deg13_excl <- Deg1_xlsx[match(List_of_genes_ordered_forxlsx_sup$Deg31_exlusive_intersect, Deg1_xlsx$mgi_symbol),] %>%
select("log2FoldChange", "padj") %>%
rename("DEG1_log2FoldChange" = "log2FoldChange", "DEG1_padj" = "padj")
List_of_genes_ordered_forxlsx_sup$Deg31_exlusive_intersect <- cbind(Deg31_excl, Deg13_excl)
Deg21_excl <- Deg2_xlsx[match(List_of_genes_ordered_forxlsx_sup$Deg12_exlusive_intersect, Deg2_xlsx$mgi_symbol),] %>%
rename("DEG2_log2FoldChange" = "log2FoldChange", "DEG2_padj" = "padj")
Deg12_excl <- Deg1_xlsx[match(List_of_genes_ordered_forxlsx_sup$Deg12_exlusive_intersect, Deg1_xlsx$mgi_symbol),] %>%
select("log2FoldChange", "padj") %>%
rename("DEG1_log2FoldChange" = "log2FoldChange", "DEG1_padj" = "padj")
List_of_genes_ordered_forxlsx_sup$Deg12_exlusive_intersect <- cbind(Deg21_excl, Deg12_excl)
Deg23_excl <- Deg2_xlsx[match(List_of_genes_ordered_forxlsx_sup$Deg23_exlusive_intersect, Deg2_xlsx$mgi_symbol),] %>%
rename("DEG2_log2FoldChange" = "log2FoldChange", "DEG2_padj" = "padj")
Deg32_excl <- Deg3_xlsx[match(List_of_genes_ordered_forxlsx_sup$Deg23_exlusive_intersect, Deg3_xlsx$mgi_symbol),] %>%
select("log2FoldChange", "padj") %>%
rename("DEG3_log2FoldChange" = "log2FoldChange", "DEG3_padj" = "padj")
List_of_genes_ordered_forxlsx_sup$Deg12_exlusive_intersect <- cbind(Deg23_excl, Deg32_excl)
Deg123_1 <- Deg1_xlsx[match(List_of_genes_ordered_forxlsx_sup$Deg123_intersect, Deg1_xlsx$mgi_symbol),] %>%
rename("DEG1_log2FoldChange" = "log2FoldChange", "DEG1_padj" = "padj")
Deg123_2 <- Deg2_xlsx[match(List_of_genes_ordered_forxlsx_sup$Deg123_intersect, Deg2_xlsx$mgi_symbol),] %>%
select("log2FoldChange", "padj") %>%
rename("DEG2_log2FoldChange" = "log2FoldChange", "DEG2_padj" = "padj")
Deg123_3 <- Deg3_xlsx[match(List_of_genes_ordered_forxlsx_sup$Deg123_intersect, Deg3_xlsx$mgi_symbol),] %>%
select("log2FoldChange", "padj") %>%
rename("DEG3_log2FoldChange" = "log2FoldChange", "DEG3_padj" = "padj")
List_of_genes_ordered_forxlsx_sup$Deg123_intersect <- cbind(Deg123_1, Deg123_2, Deg123_3)
openxlsx::write.xlsx(List_of_genes_ordered_forxlsx_sup,
"~/workspace/results_dir/nk_tum_immunoedit_complete/overlaping_genes_deg_1_2_3_with_extra.xlsx")#Tumor_plus_WT_NK_timepoint_2 Tumor_plus_IFNg_timepoint_2 Tumor_plus_PrfKO_NK_timepoint_2 Tumor_only_timepoint_2
save_dir <- "~/workspace/results_dir/EXP9.8_publication/"
dir.create(save_dir)
condition_tp_subset <- c("Tumor_plus_WT_NK_timepoint_2",
"Tumor_plus_IFNg_timepoint_2",
"Tumor_plus_PrfKO_NK_timepoint_2",
"Tumor_only_timepoint_2")
dds_subset <- dds_filt[ , (dds_filt$condition_tp %in% condition_tp_subset) & (dds_filt$experiment %in% experiment_subset)]
# Dropping levels
dds_subset$condition_tp <- droplevels(dds_subset$condition_tp)
table(dds_subset$condition_tp)##
## Tumor_only_timepoint_2 Tumor_plus_WT_NK_timepoint_2 Tumor_plus_IFNg_timepoint_2 Tumor_plus_PrfKO_NK_timepoint_2
## 6 6 6 6
dds_subset$experiment <- droplevels(dds_subset$experiment)
table(dds_subset$experiment)##
## EXP9.8
## 24
dds_subset$experiment <- droplevels(dds_subset$experiment)
dds_subset$cell_line_label <- droplevels(dds_subset$cell_line_label)
# filtering lowly expressed genes
dds_subset_filt <- filterDatasets(dds_subset,
abs_filt = TRUE,
abs_filt_samples = abs_filt_samples) # at least in N samples, which is a smallest group size## Original dds object samples: 24 genes: 15430
## Minimum number of samples with expression: 3
## Number of filtered genes: 3228
## Filtered dds object has samples: 24 genes: 12202
design(dds_subset_filt) <- deg_design
dds_subset_filt <- DESeq2::estimateSizeFactors(dds_subset_filt)
dds_subset_filt <- DESeq2::DESeq(dds_subset_filt) # do not replace outliers based on replicates
log2_norm_subset_filt <- DESeq2::normTransform(dds_subset_filt)
vsd_subset_filt <- DESeq2::vst(dds_subset_filt, blind = FALSE) # using blind=FALSE utilize design info;
rm(key_metadata_subset, key_variables_tableOne) # in case this exists from previous runs
key_metadata_subset <- as.data.frame(colData(dds_subset)) %>%
dplyr::select(experiment, condition_tp, cell_line_label, technical_replicate)
key_variables_tableOne <- tableone::CreateTableOne(vars = colnames(dplyr::select(key_metadata_subset, -condition_tp)),
strata = c("condition_tp"),
data = key_metadata_subset)
tableone::kableone(key_variables_tableOne$CatTable,
caption = "Overview of number of samples in different categories (experiment, condition,...).") %>%
kableExtra::kable_material(c("striped", "hover"))| Tumor_only_timepoint_2 | Tumor_plus_WT_NK_timepoint_2 | Tumor_plus_IFNg_timepoint_2 | Tumor_plus_PrfKO_NK_timepoint_2 | p | test | |
|---|---|---|---|---|---|---|
| n | 6 | 6 | 6 | 6 | ||
| experiment = EXP9.8 (%) | 6 (100.0) | 6 (100.0) | 6 (100.0) | 6 (100.0) | NA | |
| cell_line_label = D (%) | 3 ( 50.0) | 3 ( 50.0) | 3 ( 50.0) | 3 ( 50.0) | 1.000 | |
| technical_replicate (%) | 1.000 | |||||
| 1 | 2 ( 33.3) | 2 ( 33.3) | 2 ( 33.3) | 2 ( 33.3) | ||
| 2 | 2 ( 33.3) | 2 ( 33.3) | 2 ( 33.3) | 2 ( 33.3) | ||
| 3 | 2 ( 33.3) | 2 ( 33.3) | 2 ( 33.3) | 2 ( 33.3) |
transf_object <- vsd_subset_filt
#Before removing Batch Effect
pca_deg_plotCellLabel <- generatePCA(transf_object = transf_object[,transf_object$cell_line_label == "A"],
cond_interest_varPart = c("condition_tp", "cell_line_label", "experiment"),
color_variable = "condition_tp",
shape_variable = "cell_line_label",
ntop_genes = 1000) +
ggtitle(paste( "PCA plot. EXP9.8 Cell line A")) +
scale_color_manual(values = c("#DD3344" ,"#553388", "#A3E7FC", "#26C485"))
plot(pca_deg_plotCellLabel)ggsave(filename = file.path(save_dir, "PCA_Plot_all_conditions_A.png"), pca_deg_plotCellLabel,
width = 20, height = 14, units = "cm")
pca_deg_plotCellLabel <- generatePCA(transf_object = transf_object[,transf_object$cell_line_label == "D"],
cond_interest_varPart = c("condition_tp", "cell_line_label", "experiment"),
color_variable = "condition_tp",
shape_variable = "cell_line_label",
ntop_genes = 1000) +
ggtitle(paste( "PCA plot. EXP9.8 Cell line D")) +
scale_color_manual(values = c("#DD3344" ,"#553388", "#A3E7FC", "#26C485"))+
scale_shape_manual(values = 17)
plot(pca_deg_plotCellLabel)ggsave(filename = file.path(save_dir, "PCA_Plot_all_conditions_D.png"), pca_deg_plotCellLabel,
width = 20, height = 14, units = "cm")
# Remove batch effect (only for the cell_line_label)
transf_batch_NObatch_cell_label <- vsd_subset_filt
transf_batch_NObatch_cell_label_count <- limma::removeBatchEffect(SummarizedExperiment::assay(transf_batch_NObatch_cell_label),
transf_batch_NObatch_cell_label$cell_line_label)
SummarizedExperiment::assay(transf_batch_NObatch_cell_label) <- transf_batch_NObatch_cell_label_count
pca_deg_NObatch_cell_label <- generatePCA(transf_object = transf_batch_NObatch_cell_label,
cond_interest_varPart = c("condition_tp", "cell_line_label"),
color_variable = "condition_tp",
shape_variable = "cell_line_label",
ntop_genes = 1000) +
scale_color_manual(values = c("#DD3344" ,"#553388", "#A3E7FC", "#26C485"))+
ggtitle(paste( "PCA plot. EXP9.8 Batch corrected for cell_line"))
plot(pca_deg_NObatch_cell_label)ggsave(filename = file.path(save_dir, "PCA_Plot_all_conditions_batch_corrected_cell_lines.png"), pca_deg_NObatch_cell_label,
width = 20, height = 14, units = "cm")
##### Venn diagramms
Deg1_filename <- paste0("~/workspace/results_dir/nk_tum_immunoedit_complete/deg_1Tumor_plus_WT_NK_timepoint_2Tumor_only_timepoint_2/degTumor_plus_WT_NK_timepoint_2_vs_Tumor_only_timepoint_2_results.xlsx")
Deg2_filename <- paste0("~/workspace/results_dir/nk_tum_immunoedit_complete/deg_2Tumor_plus_IFNg_timepoint_2Tumor_only_timepoint_2/degTumor_plus_IFNg_timepoint_2_vs_Tumor_only_timepoint_2_results.xlsx")
Deg3_filename <- paste0("~/workspace/results_dir/nk_tum_immunoedit_complete/deg_3Tumor_plus_PrfKO_NK_timepoint_2Tumor_only_timepoint_2/degTumor_plus_PrfKO_NK_timepoint_2_vs_Tumor_only_timepoint_2_results.xlsx")
Deg1 <- openxlsx::read.xlsx(Deg1_filename,sheet = 'results_signif')
Deg1_pos <- Deg1[Deg1$log2FoldChange > 0, c('mgi_symbol') ]
Deg1_neg <- Deg1[Deg1$log2FoldChange < 0, c('mgi_symbol') ]
Deg2 <- openxlsx::read.xlsx(Deg2_filename,sheet = 'results_signif')
Deg2_pos <- Deg2[Deg2$log2FoldChange > 0, c('mgi_symbol') ]
Deg2_neg <- Deg2[Deg2$log2FoldChange < 0, c('mgi_symbol') ]
Deg2_neg <- Deg2_neg[Deg2_neg!=""]
Deg3 <- openxlsx::read.xlsx(Deg3_filename,sheet = 'results_signif')
Deg3_pos <- Deg3[Deg3$log2FoldChange > 0, c('mgi_symbol') ]
Deg3_neg <- Deg3[Deg3$log2FoldChange < 0, c('mgi_symbol') ]
Deg3_neg <- Deg3_neg[Deg3_neg!=""]
Deg_list_pos <- list("Tmr+WT_NK TP2 vs T only TP2" = Deg1_pos,
"Tmr+IFNg TP2 vs T only TP2" = Deg2_pos,
"Tmr+PrfKO NK TP2 vs T only TP2" = Deg3_pos)
Deg_list_neg <- list("Tmr+WT_NK TP2 vs T only TP2" = Deg1_neg,
"Tmr+IFNg TP2 vs T only TP2" = Deg2_neg,
"Tmr+PrfKO NK TP2 vs T only TP2" = Deg3_neg)
library(ggvenn)
ven_diag <- ggvenn(Deg_list_pos,fill_color = c("#553388", "#A3E7FC", "#26C485"))
ven_diagggsave(filename = file.path(save_dir, "venn_diag_positive_up_reg.png"), ven_diag,
width = 20, height = 20, units = "cm")
ven_diag <- ggvenn(Deg_list_neg,fill_color = c("#553388", "#A3E7FC", "#26C485"))
ven_diagggsave(filename = file.path(save_dir, "venn_diag_positive_down_reg.png"), ven_diag,
width = 20, height = 20, units = "cm")
Deg12_intersect_pos <- intersect(Deg1_pos, Deg2_pos)
Deg123_intersect_pos <- intersect(Deg12_intersect_pos, Deg3_pos)
Deg23_intersect_pos <- intersect(Deg2_pos, Deg3_pos)
Deg31_intersect_pos <- intersect(Deg3_pos, Deg1_pos)
Deg12_intersect_pos_scecif <- setdiff(Deg12_intersect_pos, Deg123_intersect_pos)
Deg23_intersect_pos_specif <- setdiff(Deg23_intersect_pos, Deg123_intersect_pos)
Deg31_intersect_pos_specif <- setdiff(Deg31_intersect_pos, Deg123_intersect_pos)
setdiff(Deg1_pos, c(Deg2_pos, Deg3_pos))## [1] "Bnip3" "Aldoa" "Pgk1" "Tpi1" "Ero1l" "Pkm" "Ddit4" "Pdk1" "Bnip3l" "Pgm1" "Fam162a" "Ulk3" "Pfkl"
## [14] "Rasal3" "Pfkp" "Ankrd37" "Kdm3a" "Map4" "Slc37a4" "Tgm2" "Trpc4" "Zfp810" "Cpne5" "Cox7a1" "Egln1" "Ak4"
## [27] "Aldoc" "Hsd11b1" "Zfp62" "Crlf1" "Mgarp" "Ropn1l" "Cd244a" "Gstm4" "Ercc1" "Izumo4"
setdiff(Deg2_pos, c(Deg1_pos, Deg3_pos))## [1] "Ifi47" "Tgtp2" "Gbp7" "Gbp4" "Gbp5" "Irgm2" "Tap1" "H2-K1"
## [9] "Jun" "Socs1" "Serpina3f" "Gm12250" "Irgm1" "B2m" "Tapbpl" "Batf2"
## [17] "Cd1d1" "9330175E14Rik" "F830016B08Rik" "Psmb8" "Serpina3g" "Gbp3" "Parp11" "Gm12216"
## [25] "Irf9" "AW112010" "Psmb10" "Ppa1" "Nlrc5" "Wars" "Lap3" "Ifitm3"
## [33] "Gm43302" "Parp9" "Dgcr6" "Il18bp" "Ube2l6" "Trafd1" "Tapbp" "Tmem140"
## [41] "Psap" "Nampt" "Casp1" "Trim12c" "Tfrc" "Rnf19b" "Gbp2b" "Ly6c1"
## [49] "Isg20" "Ctss" "Coa5" "Gpx1" "Slfn2" "Ifi44" "Psme1" "Dtx3l"
## [57] "Psmg4" "Bst2" "Gimap4" "P2ry14" "Ifitm1" "Isg15" "Tnfrsf26" "Ffar2"
## [65] "Prm1" "H2-D1" "Sp110" "Gm1966" "Psme2" "Trim30a" "Idnk" "Mpeg1"
## [73] "Pecam1" "Ifngr2" "Ccr2" "Tnfsf10" "Prodh" "Gm4486" "Ifi35" "Itm2b"
## [81] "Csf1" "Psmb9" "Samhd1" "Usp18" "Ogfr" "Fas" "Ifi209" "Rnf114"
## [89] "Ly6c2" "Oasl2" "Snx10" "Cd274" "Cfp" "Rsad2" "Plac8" "Psme2b"
## [97] "Guca1a" "Klf6" "Tmem33" "Erap1" "H2-T22" "Ctsc" "Aida" "Cask"
## [105] "Ms4a6c" "Myof" "Rmdn3" "BC051226" "Cemip2" "Nod1" "Gm12840" "Xaf1"
## [113] "H2-Q5" "Tgtp1" "Phc2" "Parp12" "Reps1" "Oas3" "Mapk9" "Rnase6"
## [121] "Glrx" "Vwa5a" "Il2rg" "Lgals3" "Ppp2r5b" "1600014C10Rik" "Ly6d" "Rsbn1l"
## [129] "Apol7b" "Nuak2" "Dio2" "Ciita" "Olfr753-ps1" "Fcgr3" "Nmi" "Snhg15"
## [137] "Tmtc4" "Apex2" "Trim34a" "Dkk3" "Gm20627" "Ica1" "Prdm1" "Serpinb6a"
## [145] "Ifit1" "Cebpb" "Tmco4" "Cep89" "Ccl9" "Stat3" "Gm6545" "Plscr1"
## [153] "Mmp13" "Sema3g" "H2-DMb2" "Piwil2" "Uba7" "Ccrl2" "Chpf2" "Rasa4"
## [161] "Gda" "Arhgap25" "Lax1" "Ifit1bl1" "Parp14" "Irf7" "Gm49339" "Selenow"
## [169] "Tusc1" "Ryr2" "Atp8a1" "Sco1" "Gm48161" "Fbn1" "Rgs1" "Pon3"
## [177] "Marchf5" "Znrd2" "Srxn1" "Tspan2" "Gpr18" "Tmem50b" "Adar" "Gm50255"
## [185] "Osm" "Phf11a" "Gm10076" "Shb" "Il12rb1" "Trim12a" "H2-T23" "Igf1"
## [193] "Pgf" "Wsb2" "Ifi203" "Ifi213" "Scly" "Usf1" "Bcl3" "Alcam"
## [201] "Sh2d5" "Timm21" "Cd36" "Vsir" "Snhg5" "Cmpk2" "Minpp1" "Tmem138"
## [209] "Traf5" "Creb3" "Smg8" "Mat2b" "Sesn3" "Il15ra" "Ly6i" "Rpl17-ps8"
## [217] "Ifi206" "Themis2" "Tmem86a" "Parp10" "Gpatch11" "Zbtb32" "Oas1a" "Zfp422"
## [225] "Serpini1" "Dhx58" "Cxcr3" "BC147527" "Ifi211" "Ndufc1" "Hck" "Gm16701"
## [233] "Slc11a1" "Prkaa2" "Lsm8" "Aopep" "Snx14" "Inpp4a" "Hpgds" "Calhm6"
## [241] "Zfp846" "Rnf213" "Trim21" "Gpm6a" "Slc35e2" "Ssbp2" "Abtb2" "Ssbp1"
## [249] "Marchf1" "Mtf1" "Rps18-ps6" "H2-T24" "Bpnt1" "Nt5e" "Polr1e" "Coq10b"
## [257] "Cog1" "Rangrf" "Htatip2" "Dnase1l3" "Slc22a5" "Trmt1l" "Slc23a2" "H2-DMb1"
## [265] "Parp3" "Mettl17" "Lyrm1" "Igkc" "Rhbdf2" "Slc30a4" "9930104L06Rik" "Ccdc146"
## [273] "Gosr1" "Tmem106a" "Abhd15" "Cpne8" "Gm24265" "Erlin1" "Endod1" "Vps51"
## [281] "Nfkb1" "Aicda" "Slc26a6"
setdiff(Deg3_pos, c(Deg1_pos, Deg2_pos))## [1] "Rnaseh2a" "Dmrta2" "Tstd3" "Fastkd2" "Rad52" "Cox6a2" "Dpf1" "Tnnt2" "Abcb4" "Cd28" "Pmel" "Ltk"
## [13] "Zfp113" "Poli" "H2bc3" "Mapk12"
Deg12_intersect_neg <- intersect(Deg1_neg, Deg2_neg)
Deg123_intersect_neg <- intersect(Deg12_intersect_neg, Deg3_neg)
Deg23_intersect_neg <- intersect(Deg2_neg, Deg3_neg)
Deg31_intersect_neg <- intersect(Deg3_neg, Deg1_neg)
Deg12_intersect_neg_scecif <- setdiff(Deg12_intersect_neg, Deg123_intersect_neg)
Deg23_intersect_neg_specif <- setdiff(Deg23_intersect_neg, Deg123_intersect_neg)
Deg31_intersect_neg_specif <- setdiff(Deg31_intersect_neg, Deg123_intersect_neg)
setdiff(Deg1_neg, c(Deg2_neg, Deg3_neg))## [1] "Phip" "Ighg2b" "Zfp709" "Tlk2" "H2ac20" "Lbhd1" "Pogk" "Gm20186"
## [9] "Rgs10" "1300002E11Rik" "R3hdm2" "Gng4" "Rabgap1l" "Aicda" "Smcr8" "Tcf7l1"
## [17] "Ggps1" "Stag2" "2700038G22Rik" "Marcks" "Smpd3" "Gas7" "4930523C07Rik" "Samd8"
## [25] "Ago1" "Ik" "Rpph1"
setdiff(Deg2_neg, c(Deg1_neg, Deg3_neg))## [1] "Cdh1" "Ckap4" "St3gal5" "Zcchc24" "Txnrd3" "Neurl1b" "Pygm" "Dipk1b"
## [9] "Cd22" "Grb7" "Crhbp" "Prkcb" "Mki67" "Lta" "Strbp" "Cd248"
## [17] "Actn1" "Klhdc2" "Trp53i11" "Ramp1" "Itih5" "Nsg2" "Mgll" "Mier3"
## [25] "Ndrg1" "Adgrl1" "Tac4" "Nuak1" "Tyro3" "Lat" "Scube3" "Ptgs1"
## [33] "Nedd9" "Coro7" "Otub2" "Kifc5b" "Ptp4a3" "Dgkd" "Tox" "Gm36120"
## [41] "Vangl2" "Cd37" "Endou" "Atad5" "Plcl1" "Epb41l5" "Hs3st2" "Slc16a3"
## [49] "1700019D03Rik" "Ehd2" "Rgs7bp" "Def6" "Pycr1" "Ets1" "Satb1" "Jag2"
## [57] "Napg" "Ctnnbip1" "Irf4" "Faap100" "Loxl2" "Nfia" "Tcn2" "Shroom3"
## [65] "Gas2l3" "Scd1" "Bok" "Kdm7a" "Gm48768" "Irs2" "Pcx" "Cmah"
## [73] "Abi2" "Adgre5" "Gmpr" "Iqsec1" "Pola1" "Hbb-bt" "Card11" "Dusp7"
## [81] "Prdm8" "E2f7" "Pou2f1" "Hvcn1" "Icam2" "1700120C14Rik" "Ano10" "Igf2bp2"
## [89] "Mfsd4a" "Pecr" "Cit" "Emp1" "Egfl7" "Klrb1c" "Tmed6" "Smtnl2"
## [97] "Pik3c2a" "Atp2a3" "Bhlhe40" "Sema4b" "Nab2" "Tcf7l2" "Jcad" "Tbx19"
## [105] "Slc37a2" "Sema5a" "Brdt" "C730034F03Rik" "Nkd2" "C2cd5" "Abca2" "Arhgef1"
## [113] "Gm9227" "S100a11" "Slc47a1" "Kank2" "Ncs1" "BC049352" "Nek1" "Fam160b1"
## [121] "Abraxas1" "Pla2g2f" "Gm42047" "Gdf11" "Batf" "Ccdc47" "Crisp1" "Pygl"
## [129] "Ltb" "0610040J01Rik" "A730063M14Rik" "Stradb" "Cryl1" "Rtn1" "Ephx1" "Cdc42ep4"
## [137] "Accs" "Rbms3" "Slc44a1" "4632427E13Rik" "Rcan3" "St6gal1" "Ccpg1" "Rapgefl1"
## [145] "Tnfrsf9" "Glis2" "Slc17a8" "Bfsp2" "Tmem176b" "Fam8a1" "E130308A19Rik" "Tmem158"
## [153] "Mcm8" "Rapgef3" "Tsc22d3" "Rasl10a" "Chst15" "Syndig1l" "Map3k1" "Srgap3"
## [161] "Stox2" "H2ac6" "Vldlr" "Usp6nl" "Smpd1" "Slc25a40" "Kdm4c" "Abcd3"
## [169] "Mbd4" "Tnni2" "Plin3" "Fhl1" "Azin1" "Prxl2c" "Pcdh11x" "Hes1"
## [177] "Kif21b" "Cth" "Mybl1" "Armt1" "Izumo1r" "Cdhr2" "Mtss1" "Cbx7"
## [185] "Ccdc112" "Zfp821" "Cdhr4" "Smyd4" "Gm9993" "Cradd" "Uaca" "Gm36684"
## [193] "Gpt2" "Hook3" "Acp5" "Wdr91" "Arhgef17" "Gm11824" "Gfi1b" "Hid1"
## [201] "Gm26531" "Bclaf3" "Pcdh9" "Ier3" "Sel1l3" "Abtb1" "Gkap1" "Rtkn2"
## [209] "Sorcs2" "Qrfp" "Tasor2" "Apbb2" "Syngr1" "Rprm" "4933427D14Rik" "Ddx25"
## [217] "Zfp318" "Dalrd3" "Prpf40b" "Mpp3" "Ribc1" "Itga9" "Zyg11b" "Cavin1"
## [225] "Sult1d1" "Abcb4" "Gm867" "Angpt1" "Grip1" "Acy3" "Mpp7" "a"
## [233] "Vps13b" "Cpm" "Txnrd2" "Hs3st1" "Ltbp4" "Tulp4" "Chd3" "Pcbd1"
## [241] "Nme3" "Spats2" "Dusp23" "Slc7a3" "Tctn1" "Zfp820" "Phldb1" "Zfp956"
## [249] "Dync2h1" "Cdkal1" "Six5" "Ccdc88c" "Nup160" "Meis2" "Zan" "Gas2"
## [257] "Ramp2"
setdiff(Deg3_neg, c(Deg1_neg, Deg2_neg))## [1] "Utp20" "Snrpd3" "Dleu2" "Rbm19" "Slfn2" "Glb1l" "Lmo7" "Cep290" "Maged1" "Tbk1" "Svip"
## [12] "Rn7sk" "Kmt2d" "Iqce" "Gm47304" "Bmp2k" "Clock" "Eif4enif1" "Rmnd1" "Enc1" "Ap1ar" "Sema7a"
## [23] "Mir17hg" "Relch"
cid <- 0
###### volcano plots for the publication ######sessionInfo()
# saving session info
sink(file.path(results_dir, paste0("sessionInfo_", project_name,".txt")))
sessionInfo()
sink()
renv::snapshot(lockfile = file.path(base_dir, paste0(project_name, "_renv.lock")))
renv::status(lockfile = file.path(base_dir, paste0(project_name, "_renv.lock")))